changeset 13285:e02b6d3917e4

700 mercurial based tools could be faster Reviewed by: trisk@nexenta.com Reviewed by: jason.brian.king@gmail.com Approved by: garrett@nexenta.com
author Richard Lowe <richlowe@richlowe.net>
date Fri, 11 Feb 2011 13:58:20 -0500
parents fcadd752253e
children 590780cedaf1
files usr/src/tools/onbld/Scm/Backup.py usr/src/tools/onbld/Scm/WorkSpace.py usr/src/tools/onbld/hgext/cdm.py
diffstat 3 files changed, 1002 insertions(+), 459 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/tools/onbld/Scm/Backup.py	Wed Feb 09 11:52:15 2011 -0800
+++ b/usr/src/tools/onbld/Scm/Backup.py	Fri Feb 11 13:58:20 2011 -0500
@@ -17,9 +17,10 @@
 # Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# Copyright 2008, 2010, Richard Lowe
+# Copyright 2008, 2011, Richard Lowe
 #
 
+
 '''
 Workspace backup
 
@@ -52,6 +53,17 @@
                 $CODEMGR_WS/.hg/localtags
                 $CODEMGR_WS/.hg/patches (Mq data)
 
+            clear.tar.gz (handled by CdmClearBackup)
+                <short node>/
+                    copies of each modified or added file, as it is in
+                    this head.
+
+                 ... for each outgoing head
+
+                working/
+                     copies of each modified or added file in the
+                     working copy if any.
+
          latest -> generation#
             Newest backup generation.
 
@@ -59,7 +71,9 @@
 dirstate, are optional.
 '''
 
-import os, pwd, shutil, tarfile, time, traceback
+import grp, os, pwd, shutil, tarfile, time, traceback
+from cStringIO import StringIO
+
 from mercurial import changegroup, error, node, patch, util
 
 
@@ -95,6 +109,101 @@
                             "which the backup was taken.\n" % (msg, n))
 
 
+class CdmTarFile(tarfile.TarFile):
+    '''Tar file access + simple comparison to the filesystem, and
+    creation addition of files from Mercurial filectx objects.'''
+
+    def __init__(self, *args, **kwargs):
+        tarfile.TarFile.__init__(self, *args, **kwargs)
+        self.errorlevel = 2
+
+    def members_match_fs(self, rootpath):
+        '''Compare the contents of the tar archive to the directory
+        specified by rootpath.  Return False if they differ.
+
+        Every file in the archive must match the equivalent file in
+        the filesystem.
+
+        The existence, modification time, and size of each file are
+        compared, content is not.'''
+
+        def _member_matches_fs(member, rootpath):
+            '''Compare a single member to its filesystem counterpart'''
+            fpath = os.path.join(rootpath, member.name)
+
+            if not os.path.exists(fpath):
+                return False
+            elif ((os.path.isfile(fpath) != member.isfile()) or
+                  (os.path.isdir(fpath) != member.isdir()) or
+                  (os.path.islink(fpath) != member.issym())):
+                return False
+
+            #
+            # The filesystem may return a modification time with a
+            # fractional component (as a float), whereas the tar format
+            # only stores it to the whole second, perform the comparison
+            # using integers (truncated, not rounded)
+            #
+            elif member.mtime != int(os.path.getmtime(fpath)):
+                return False
+            elif not member.isdir() and member.size != os.path.getsize(fpath):
+                return False
+            else:
+                return True
+
+        for elt in self:
+            if not _member_matches_fs(elt, rootpath):
+                return False
+
+        return True
+
+    def addfilectx(self, filectx, path=None):
+        '''Add a filectx object to the archive.
+
+        Use the path specified by the filectx object or, if specified,
+        the PATH argument.
+
+        The size, modification time, type and permissions of the tar
+        member are taken from the filectx object, user and group id
+        are those of the invoking user, user and group name are those
+        of the invoking user if information is available, or "unknown"
+        if it is not.
+        '''
+
+        t = tarfile.TarInfo(path or filectx.path())
+        t.size = filectx.size()
+        t.mtime = filectx.date()[0]
+        t.uid = os.getuid()
+        t.gid = os.getgid()
+
+        try:
+            t.uname = pwd.getpwuid(t.uid).pw_name
+        except KeyError:
+            t.uname = "unknown"
+
+        try:
+            t.gname = grp.getgrgid(t.gid).gr_name
+        except KeyError:
+            t.gname = "unknown"
+
+        #
+        # Mercurial versions symlinks by setting a flag and storing
+        # the destination path in place of the file content.  The
+        # actual contents (in the tar), should be empty.
+        #
+        if 'l' in filectx.flags():
+            t.type = tarfile.SYMTYPE
+            t.mode = 0777
+            t.linkname = filectx.data()
+            data = None
+        else:
+            t.type = tarfile.REGTYPE
+            t.mode = 'x' in filectx.flags() and 0755 or 0644
+            data = StringIO(filectx.data())
+
+        self.addfile(t, data)
+
+
 class CdmCommittedBackup(object):
     '''Backup of committed changes'''
 
@@ -130,50 +239,53 @@
         changegroup.writebundle(cg, self.bu.backupfile('bundle'), 'HG10BZ')
 
         outnodes = self._outgoing_nodes(parent)
-        if outnodes:
-            fp = None
+        if not outnodes:
+            return
+
+        fp = None
+        try:
             try:
-                try:
-                    fp = open(self.bu.backupfile('nodes'), 'w')
-                    fp.write('%s\n' % '\n'.join(outnodes))
-                except EnvironmentError, e:
-                    raise util.Abort("couldn't store outgoing nodes: %s" % e)
-            finally:
-                if fp and not fp.closed:
-                    fp.close()
+                fp = self.bu.open('nodes', 'w')
+                fp.write('%s\n' % '\n'.join(outnodes))
+            except EnvironmentError, e:
+                raise util.Abort("couldn't store outgoing nodes: %s" % e)
+        finally:
+            if fp and not fp.closed:
+                fp.close()
 
     def restore(self):
         '''Restore committed changes from backup'''
-        bfile = self.bu.backupfile('bundle')
+
+        if not self.bu.exists('bundle'):
+            return
 
-        if os.path.exists(bfile):
-            f = None
+        bpath = self.bu.backupfile('bundle')
+        f = None
+        try:
             try:
-                try:
-                    f = open(bfile, 'r')
-                    bundle = changegroup.readbundle(f, bfile)
-                    self.ws.repo.addchangegroup(bundle, 'strip',
-                                                'bundle:%s' % bfile)
-                except EnvironmentError, e:
-                    raise util.Abort("couldn't restore committed changes: %s\n"
-                                     "   %s" % (bfile, e))
-                except error.LookupError, e:
-                    raise CdmNodeMissing("couldn't restore committed changes",
-                                                     e.name)
-            finally:
-                if f and not f.closed:
-                    f.close()
+                f = self.bu.open('bundle')
+                bundle = changegroup.readbundle(f, bpath)
+                self.ws.repo.addchangegroup(bundle, 'strip',
+                                            'bundle:%s' % bpath)
+            except EnvironmentError, e:
+                raise util.Abort("couldn't restore committed changes: %s\n"
+                                 "   %s" % (bpath, e))
+            except error.LookupError, e:
+                raise CdmNodeMissing("couldn't restore committed changes",
+                                                 e.name)
+        finally:
+            if f and not f.closed:
+                f.close()
 
     def need_backup(self):
         '''Compare backup of committed changes to workspace'''
 
-        if os.path.exists(self.bu.backupfile('nodes')):
+        if self.bu.exists('nodes'):
             f = None
             try:
                 try:
-                    f = open(self.bu.backupfile('nodes'))
-                    bnodes = set([line.rstrip('\r\n')
-                                  for line in f.readlines()])
+                    f = self.bu.open('nodes')
+                    bnodes = set(line.rstrip('\r\n') for line in f.readlines())
                     f.close()
                 except EnvironmentError, e:
                     raise util.Abort("couldn't open backup node list: %s" % e)
@@ -184,7 +296,13 @@
             bnodes = set()
 
         outnodes = set(self._outgoing_nodes(self.ws.parent()))
-        if outnodes != bnodes:
+
+        #
+        # If there are outgoing nodes not in the prior backup we need
+        # to take a new backup; it's fine if there are nodes in the
+        # old backup which are no longer outgoing, however.
+        #
+        if not outnodes <= bnodes:
             return True
 
         return False
@@ -192,9 +310,8 @@
     def cleanup(self):
         '''Remove backed up committed changes'''
 
-        for fname in self.files:
-            if os.path.exists(self.bu.backupfile(fname)):
-                os.unlink(self.bu.backupfile(fname))
+        for f in self.files:
+            self.bu.unlink(f)
 
 
 class CdmUncommittedBackup(object):
@@ -203,21 +320,22 @@
     def __init__(self, backup, ws):
         self.ws = ws
         self.bu = backup
+        self.wctx = self.ws.workingctx(worklist=True)
 
     def _clobbering_renames(self):
         '''Return a list of pairs of files representing renames/copies
-        that clobber already versioned files.  [(oldname newname)...]'''
+        that clobber already versioned files.  [(old-name new-name)...]
+        '''
 
         #
         # Note that this doesn't handle uncommitted merges
         # as CdmUncommittedBackup itself doesn't.
         #
-        wctx = self.ws.workingctx()
-        parent = wctx.parents()[0]
+        parent = self.wctx.parents()[0]
 
         ret = []
-        for fname in wctx.added() + wctx.modified():
-            rn = wctx.filectx(fname).renamed()
+        for fname in self.wctx.added() + self.wctx.modified():
+            rn = self.wctx.filectx(fname).renamed()
             if rn and fname in parent:
                 ret.append((rn[0], fname))
         return ret
@@ -229,35 +347,29 @@
             raise util.Abort("Unable to backup an uncommitted merge.\n"
                              "Please complete your merge and commit")
 
-        dirstate = node.hex(self.ws.workingctx().parents()[0].node())
+        dirstate = node.hex(self.wctx.parents()[0].node())
 
         fp = None
         try:
             try:
-                fp = open(self.bu.backupfile('dirstate'), 'w')
+                fp = self.bu.open('dirstate', 'w')
                 fp.write(dirstate + '\n')
+                fp.close()
             except EnvironmentError, e:
                 raise util.Abort("couldn't save working copy parent: %s" % e)
-        finally:
-            if fp and not fp.closed:
-                fp.close()
 
-        try:
             try:
-                fp = open(self.bu.backupfile('renames'), 'w')
+                fp = self.bu.open('renames', 'w')
                 for cons in self._clobbering_renames():
                     fp.write("%s %s\n" % cons)
+                fp.close()
             except EnvironmentError, e:
                 raise util.Abort("couldn't save clobbering copies: %s" % e)
-        finally:
-            if fp and not fp.closed:
-                fp.close()
 
-        try:
             try:
-                fp = open(self.bu.backupfile('diff'), 'w')
-                opts = patch.diffopts(self.ws.ui, opts={'git': True})
-                fp.write(self.ws.diff(opts=opts))
+                fp = self.bu.open('diff', 'w')
+                match = self.ws.matcher(files=self.wctx.files())
+                fp.write(self.ws.diff(opts={'git': True}, match=match))
             except EnvironmentError, e:
                 raise util.Abort("couldn't save working copy diff: %s" % e)
         finally:
@@ -269,18 +381,18 @@
         fp = None
         try:
             try:
-                fp = open(self.bu.backupfile('dirstate'))
+                fp = self.bu.open('dirstate')
                 dirstate = fp.readline().strip()
-                return dirstate
             except EnvironmentError, e:
                 raise util.Abort("couldn't read saved parent: %s" % e)
         finally:
             if fp and not fp.closed:
                 fp.close()
 
+        return dirstate
+
     def restore(self):
         '''Restore uncommitted changes'''
-        diff = self.bu.backupfile('diff')
         dirstate = self._dirstate()
 
         #
@@ -298,14 +410,14 @@
         except util.Abort, e:
             raise util.Abort("couldn't update to saved node: %s" % e)
 
-        if not os.path.exists(diff):
+        if not self.bu.exists('diff'):
             return
 
         #
         # There's a race here whereby if the patch (or part thereof)
         # is applied within the same second as the clean above (such
-        # that mtime doesn't change) and if the size of that file
-        # does not change, Hg may not see the change.
+        # that modification time doesn't change) and if the size of
+        # that file does not change, Hg may not see the change.
         #
         # We sleep a full second to avoid this, as sleeping merely
         # until the next second begins would require very close clock
@@ -315,6 +427,7 @@
 
         files = {}
         try:
+            diff = self.bu.backupfile('diff')
             try:
                 fuzz = patch.patch(diff, self.ws.ui, strip=1,
                                    cwd=self.ws.repo.root, files=files)
@@ -326,7 +439,7 @@
         finally:
             patch.updatedir(self.ws.ui, self.ws.repo, files)
 
-        if not os.path.exists(self.bu.backupfile('renames')):
+        if not self.bu.exists('renames'):
             return
 
         #
@@ -335,7 +448,7 @@
         # Hg would otherwise ignore them.
         #
         try:
-            fp = open(self.bu.backupfile('renames'))
+            fp = self.bu.open('renames')
             for line in fp:
                 source, dest = line.strip().split()
                 self.ws.copy(source, dest)
@@ -347,57 +460,54 @@
 
     def need_backup(self):
         '''Compare backup of uncommitted changes to workspace'''
-        cnode = self.ws.workingctx().parents()[0].node()
+        cnode = self.wctx.parents()[0].node()
         if self._dirstate() != node.hex(cnode):
             return True
 
-        opts = patch.diffopts(self.ws.ui, opts={'git': True})
-        curdiff = self.ws.diff(opts=opts)
+        fd = None
+        match = self.ws.matcher(files=self.wctx.files())
+        curdiff = self.ws.diff(opts={'git': True}, match=match)
 
-        diff = self.bu.backupfile('diff')
-        if os.path.exists(diff):
-            try:
+        try:
+            if self.bu.exists('diff'):
                 try:
-                    fd = open(diff)
+                    fd = self.bu.open('diff')
                     backdiff = fd.read()
+                    fd.close()
                 except EnvironmentError, e:
                     raise util.Abort("couldn't open backup diff %s\n"
-                                     "   %s" % (diff, e))
-            finally:
-                if fd and not fd.closed:
-                    fd.close()
-        else:
-            backdiff = ''
+                                     "   %s" % (self.bu.backupfile('diff'), e))
+            else:
+                backdiff = ''
 
-        if backdiff != curdiff:
-            return True
+            if backdiff != curdiff:
+                return True
 
-
-        currrenamed = self._clobbering_renames()
-        bakrenamed = None
+            currrenamed = self._clobbering_renames()
+            bakrenamed = None
 
-        if os.path.exists(self.bu.backupfile('renames')):
-            try:
+            if self.bu.exists('renames'):
                 try:
-                    fd = open(self.bu.backupfile('renames'))
-                    bakrenamed = [line.strip().split(' ') for line in fd]
+                    fd = self.bu.open('renames')
+                    bakrenamed = [tuple(line.strip().split(' ')) for line in fd]
+                    fd.close()
                 except EnvironmentError, e:
                     raise util.Abort("couldn't open renames file %s: %s\n" %
                                      (self.bu.backupfile('renames'), e))
-            finally:
-                if fd and not fd.closed:
-                    fd.close()
 
             if currrenamed != bakrenamed:
                 return True
+        finally:
+            if fd and not fd.closed:
+                fd.close()
 
         return False
 
     def cleanup(self):
         '''Remove backed up uncommitted changes'''
-        for fname in ('dirstate', 'diff', 'renames'):
-            if os.path.exists(self.bu.backupfile(fname)):
-                os.unlink(self.bu.backupfile(fname))
+
+        for f in ('dirstate', 'diff', 'renames'):
+            self.bu.unlink(f)
 
 
 class CdmMetadataBackup(object):
@@ -411,48 +521,51 @@
     def backup(self):
         '''Backup workspace metadata'''
 
-        tar = None
+        tarpath = self.bu.backupfile('metadata.tar.gz')
+
+        #
+        # Files is a list of tuples (name, path), where name is as in
+        # self.files, and path is the absolute path.
+        #
+        files = filter(lambda (name, path): os.path.exists(path),
+                       zip(self.files, map(self.ws.repo.join, self.files)))
+
+        if not files:
+            return
 
         try:
-            try:
-                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'),
-                                   'w:gz')
-                tar.errorlevel = 2
-            except (EnvironmentError, tarfile.TarError), e:
-                raise util.Abort("couldn't open %s for writing: %s" %
-                                 (self.bu.backupfile('metadata.tar.gz'), e))
+            tar = CdmTarFile.gzopen(tarpath, 'w')
+        except (EnvironmentError, tarfile.TarError), e:
+            raise util.Abort("couldn't open %s for writing: %s" %
+                             (tarpath, e))
 
-            try:
-                for elt in self.files:
-                    fpath = self.ws.repo.join(elt)
-                    if os.path.exists(fpath):
-                        tar.add(fpath, elt)
-            except (EnvironmentError, tarfile.TarError), e:
-                #
-                # tarfile.TarError doesn't include the tar member or file
-                # in question, so we have to do so ourselves.
-                #
-                if isinstance(e, tarfile.TarError):
-                    errstr = "%s: %s" % (elt, e)
-                else:
-                    errstr = str(e)
+        try:
+            for name, path in files:
+                try:
+                    tar.add(path, name)
+                except (EnvironmentError, tarfile.TarError), e:
+                    #
+                    # tarfile.TarError doesn't include the tar member or file
+                    # in question, so we have to do so ourselves.
+                    #
+                    if isinstance(e, tarfile.TarError):
+                        errstr = "%s: %s" % (name, e)
+                    else:
+                        errstr = str(e)
 
-                raise util.Abort("couldn't backup metadata to %s:\n"
-                                 "  %s" %
-                                 (self.bu.backupfile('metadata.tar.gz'),
-                                  errstr))
+                    raise util.Abort("couldn't backup metadata to %s:\n"
+                                     "  %s" % (tarpath, errstr))
         finally:
-            if tar and not tar.closed:
-                tar.close()
+            tar.close()
 
     def old_restore(self):
         '''Restore workspace metadata from an pre-tar backup'''
 
         for fname in self.files:
-            bfile = self.bu.backupfile(fname)
-            wfile = self.ws.repo.join(fname)
+            if self.bu.exists(fname):
+                bfile = self.bu.backupfile(fname)
+                wfile = self.ws.repo.join(fname)
 
-            if os.path.exists(bfile):
                 try:
                     shutil.copy2(bfile, wfile)
                 except EnvironmentError, e:
@@ -462,20 +575,20 @@
     def tar_restore(self):
         '''Restore workspace metadata (from a tar-style backup)'''
 
-        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
-            tar = None
+        if not self.bu.exists('metadata.tar.gz'):
+            return
+
+        tarpath = self.bu.backupfile('metadata.tar.gz')
 
-            try:
+        try:
+            tar = CdmTarFile.gzopen(tarpath)
+        except (EnvironmentError, tarfile.TarError), e:
+            raise util.Abort("couldn't open %s: %s" % (tarpath, e))
+
+        try:
+            for elt in tar:
                 try:
-                    tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
-                    tar.errorlevel = 2
-                except (EnvironmentError, tarfile.TarError), e:
-                    raise util.Abort("couldn't open %s: %s" %
-                                 (self.bu.backupfile('metadata.tar.gz'), e))
-
-                try:
-                    for elt in tar:
-                        tar.extract(elt, path=self.ws.repo.path)
+                    tar.extract(elt, path=self.ws.repo.path)
                 except (EnvironmentError, tarfile.TarError), e:
                     # Make sure the member name is in the exception message.
                     if isinstance(e, tarfile.TarError):
@@ -485,87 +598,182 @@
 
                     raise util.Abort("couldn't restore metadata from %s:\n"
                                      "   %s" %
-                                     (self.bu.backupfile('metadata.tar.gz'),
-                                      errstr))
-            finally:
-                if tar and not tar.closed:
-                    tar.close()
+                                     (tarpath, errstr))
+        finally:
+            if tar and not tar.closed:
+                tar.close()
 
     def restore(self):
         '''Restore workspace metadata'''
 
-        if os.path.exists(self.bu.backupfile('hgrc')):
+        if self.bu.exists('hgrc'):
             self.old_restore()
         else:
             self.tar_restore()
 
+    def _walk(self):
+        '''Yield the repo-relative path to each file we operate on,
+        including each file within any affected directory'''
+
+        for elt in self.files:
+            path = self.ws.repo.join(elt)
+
+            if not os.path.exists(path):
+                continue
+
+            if os.path.isdir(path):
+                for root, dirs, files in os.walk(path, topdown=True):
+                    yield root
+
+                    for f in files:
+                        yield os.path.join(root, f)
+            else:
+                yield path
+
     def need_backup(self):
         '''Compare backed up workspace metadata to workspace'''
 
-        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
+        def strip_trailing_pathsep(pathname):
+            '''Remove a possible trailing path separator from PATHNAME'''
+            return pathname.endswith('/') and pathname[:-1] or pathname
+
+        if self.bu.exists('metadata.tar.gz'):
+            tarpath = self.bu.backupfile('metadata.tar.gz')
             try:
-                tar = tarfile.open(self.bu.backupfile('metadata.tar.gz'))
-                tar.errorlevel = 2
+                tar = CdmTarFile.gzopen(tarpath)
             except (EnvironmentError, tarfile.TarError), e:
                 raise util.Abort("couldn't open metadata tarball: %s\n"
-                                 "   %s" %
-                                 (self.bu.backupfile('metadata.tar.gz'), e))
-
-            for elt in tar:
-                fpath = self.ws.repo.join(elt.name)
-                if not os.path.exists(fpath):
-                    return True     # File in tar, not workspace
-
-                if elt.isdir():     # Don't care about directories
-                    continue
+                                 "   %s" % (tarpath, e))
 
-                #
-                # The filesystem can give us mtime with fractional seconds
-                # (as a float), whereas tar files only keep it to the second.
-                #
-                # Always compare to the integer (second-granularity) mtime.
-                #
-                if (elt.mtime != int(os.path.getmtime(fpath)) or
-                    elt.size != os.path.getsize(fpath)):
-                    return True
+            if not tar.members_match_fs(self.ws.repo.path):
+                tar.close()
+                return True
 
-            tarnames = tar.getnames()
+            tarnames = map(strip_trailing_pathsep, tar.getnames())
             tar.close()
         else:
             tarnames = []
 
-        for mfile in self.files:
-            fpath = self.ws.repo.join(mfile)
-
-            if os.path.isdir(fpath):
-                # Directories in tarfile always end with a '/'
-                if not mfile.endswith('/'):
-                    mfile += '/'
-
-                if mfile not in tarnames:
-                    return True
+        repopath = self.ws.repo.path
+        if not repopath.endswith('/'):
+            repopath += '/'
 
-                for root, dirs, files in os.walk(fpath, topdown=True):
-                    for elt in files:
-                        path = os.path.join(root, elt)
-
-                        rpath = self.ws.repo.path
-                        if not rpath.endswith('/'):
-                            rpath += '/'
-
-                        path = path.replace(rpath, '', 1)
-                        if path not in tarnames:
-                            return True # In workspace not tar
-            else:
-                if os.path.exists(fpath) and mfile not in tarnames:
-                    return True
+        for path in self._walk():
+            if path.replace(repopath, '', 1) not in tarnames:
+                return True
 
         return False
 
     def cleanup(self):
         '''Remove backed up workspace metadata'''
-        if os.path.exists(self.bu.backupfile('metadata.tar.gz')):
-            os.unlink(self.bu.backupfile('metadata.tar.gz'))
+        self.bu.unlink('metadata.tar.gz')
+
+
+class CdmClearBackup(object):
+    '''A backup (in tar format) of complete source files from every
+    workspace head.
+
+    Paths in the tarball are prefixed by the revision and node of the
+    head, or "working" for the working directory.
+
+    This is done purely for the benefit of the user, and as such takes
+    no part in restore or need_backup checking, restore always
+    succeeds, need_backup always returns False
+    '''
+
+    def __init__(self, backup, ws):
+        self.bu = backup
+        self.ws = ws
+
+    def _branch_pairs(self):
+        '''Return a list of tuples (parenttip, localtip) for each
+        outgoing head.  If the working copy contains modified files,
+        it is a head, and neither of its parents are.
+        '''
+
+        parent = self.ws.parent()
+
+        if parent:
+            outgoing = self.ws.findoutgoing(parent)
+            outnodes = set(self.ws.repo.changelog.nodesbetween(outgoing)[0])
+
+            heads = [self.ws.repo.changectx(n) for n in self.ws.repo.heads()
+                     if n in outnodes]
+        else:
+            heads = []
+            outnodes = []
+
+        wctx = self.ws.workingctx()
+        if wctx.files():        # We only care about file changes.
+            heads = filter(lambda x: x not in wctx.parents(), heads) + [wctx]
+
+        pairs = []
+        for head in heads:
+            if head.rev() is None:
+                c = head.parents()
+            else:
+                c = [head]
+
+            pairs.append((self.ws.parenttip(c, outnodes), head))
+        return pairs
+
+    def backup(self):
+        '''Save a clear copy of each source file modified between each
+        head and that head's parenttip (see WorkSpace.parenttip).
+        '''
+
+        tarpath = self.bu.backupfile('clear.tar.gz')
+        branches = self._branch_pairs()
+
+        if not branches:
+            return
+
+        try:
+            tar = CdmTarFile.gzopen(tarpath, 'w')
+        except (EnvironmentError, tarfile.TarError), e:
+            raise util.Abort("Could not open %s for writing: %s" %
+                             (tarpath, e))
+
+        try:
+            for parent, child in branches:
+                tpath = child.node() and node.short(child.node()) or "working"
+
+                for fname, change in self.ws.status(parent, child).iteritems():
+                    if change not in ('added', 'modified'):
+                        continue
+
+                    try:
+                        tar.addfilectx(child.filectx(fname),
+                                       os.path.join(tpath, fname))
+                    except ValueError, e:
+                        crev = child.rev()
+                        if crev is None:
+                            crev = "working copy"
+                        raise util.Abort("Could not backup clear file %s "
+                                         "from %s: %s\n" % (fname, crev, e))
+        finally:
+            tar.close()
+
+    def cleanup(self):
+        '''Cleanup a failed Clear backup.
+
+        Remove the clear tarball from the backup directory.
+        '''
+
+        self.bu.unlink('clear.tar.gz')
+
+    def restore(self):
+        '''Clear backups are never restored, do nothing'''
+        pass
+
+    def need_backup(self):
+        '''Clear backups are never compared, return False (no backup needed).
+
+        Should a backup actually be needed, one of the other
+        implementation classes would notice in any situation we would.
+        '''
+
+        return False
 
 
 class CdmBackup(object):
@@ -589,9 +797,9 @@
         #
         self.modules = [x(self, ws) for x in [CdmCommittedBackup,
                                               CdmUncommittedBackup,
+                                              CdmClearBackup,
                                               CdmMetadataBackup]]
 
-
         if os.path.exists(os.path.join(self.backupdir, 'latest')):
             generation = os.readlink(os.path.join(self.backupdir, 'latest'))
             self.generation = int(os.path.split(generation)[1])
@@ -600,8 +808,6 @@
 
     def _find_backup_dir(self, name):
         '''Find the path to an appropriate backup directory based on NAME'''
-        backupdir = None
-        backupbase = None
 
         if os.path.isabs(name):
             return name
@@ -630,11 +836,7 @@
 
         return backupdir
 
-    def backupfile(self, path):
-        '''return full path to backup file FILE at GEN'''
-        return os.path.join(self.backupdir, str(self.generation), path)
-
-    def update_latest(self, gen):
+    def _update_latest(self, gen):
         '''Update latest symlink to point to the current generation'''
         linkpath = os.path.join(self.backupdir, 'latest')
 
@@ -643,15 +845,37 @@
 
         os.symlink(str(gen), linkpath)
 
-    def create_gen(self, gen):
+    def _create_gen(self, gen):
         '''Create a new backup generation'''
         try:
             os.makedirs(os.path.join(self.backupdir, str(gen)))
-            self.update_latest(gen)
+            self._update_latest(gen)
         except EnvironmentError, e:
             raise util.Abort("Couldn't create backup generation %s: %s" %
                              (os.path.join(self.backupdir, str(gen)), e))
 
+    def backupfile(self, path):
+        '''return full path to backup file FILE at GEN'''
+        return os.path.join(self.backupdir, str(self.generation), path)
+
+    def unlink(self, name):
+        '''Unlink the specified path from the backup directory.
+        A no-op if the path does not exist.
+        '''
+
+        fpath = self.backupfile(name)
+        if os.path.exists(fpath):
+            os.unlink(fpath)
+
+    def open(self, name, mode='r'):
+        '''Open the specified file in the backup directory'''
+        return open(self.backupfile(name), mode)
+
+    def exists(self, name):
+        '''Return boolean indicating wether a given file exists in the
+        backup directory.'''
+        return os.path.exists(self.backupfile(name))
+
     def need_backup(self):
         '''Compare backed up changes to workspace'''
         #
@@ -659,8 +883,7 @@
         # invalid (lacking the dirstate file), we need a backup regardless
         # of anything else.
         #
-        if (not self.generation or
-            not os.path.exists(self.backupfile('dirstate'))):
+        if not self.generation or not self.exists('dirstate'):
             return True
 
         for x in self.modules:
@@ -683,7 +906,7 @@
                                  (self.backupdir, e))
 
         self.generation += 1
-        self.create_gen(self.generation)
+        self._create_gen(self.generation)
 
         try:
             for x in self.modules:
@@ -712,7 +935,7 @@
             self.generation -= 1
 
             if self.generation != 0:
-                self.update_latest(self.generation)
+                self._update_latest(self.generation)
             else:
                 os.unlink(os.path.join(self.backupdir, 'latest'))
 
@@ -737,10 +960,10 @@
                                  (os.path.join(self.backupdir, str(gen))))
             self.generation = int(gen)
 
-        if not self.generation: # This is ok, 0 is not a valid generation
+        if not self.generation: # This is OK, 0 is not a valid generation
             raise util.Abort('Backup has no generations: %s' % self.backupdir)
 
-        if not os.path.exists(self.backupfile('dirstate')):
+        if not self.exists('dirstate'):
             raise util.Abort('Backup %s/%s is incomplete (dirstate missing)' %
                              (self.backupdir, self.generation))
 
--- a/usr/src/tools/onbld/Scm/WorkSpace.py	Wed Feb 09 11:52:15 2011 -0800
+++ b/usr/src/tools/onbld/Scm/WorkSpace.py	Fri Feb 11 13:58:20 2011 -0500
@@ -15,7 +15,7 @@
 
 #
 # Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2008, 2010, Richard Lowe
+# Copyright 2008, 2011, Richard Lowe
 #
 
 #
@@ -37,10 +37,10 @@
 # was renamed or merely copied.  Each changed file has an
 # associated ActiveEntry.
 #
-# The ActiveList being a list ActiveEntrys can thus present the entire
-# change in workspace state between a parent and its child, and is the
-# important bit here (in that if it is incorrect, everything else will
-# be as incorrect, or more)
+# The ActiveList, being a list of ActiveEntry objects, can thus
+# present the entire change in workspace state between a parent and
+# its child and is the important bit here (in that if it is incorrect,
+# everything else will be as incorrect, or more)
 #
 
 import cStringIO
@@ -50,6 +50,7 @@
 
 from onbld.Scm import Version
 
+
 #
 # Mercurial 1.6 moves findoutgoing into a discover module
 #
@@ -73,208 +74,224 @@
 
     .is_<change>() methods.'''
 
-    MODIFIED = 1
-    ADDED = 2
-    REMOVED = 3
+    MODIFIED = intern('modified')
+    ADDED = intern('added')
+    REMOVED = intern('removed')
 
-    def __init__(self, name):
+    def __init__(self, name, change):
         self.name = name
-        self.change = None
+        self.change = intern(change)
+
+        assert change in (self.MODIFIED, self.ADDED, self.REMOVED)
+
         self.parentname = None
         # As opposed to copied (or neither)
         self.renamed = False
         self.comments = []
 
-    #
-    # ActiveEntrys sort by the name of the file they represent.
-    #
     def __cmp__(self, other):
         return cmp(self.name, other.name)
 
     def is_added(self):
-        return self.change == self.ADDED
+        '''Return True if this ActiveEntry represents an added file'''
+        return self.change is self.ADDED
 
     def is_modified(self):
-        return self.change == self.MODIFIED
+        '''Return True if this ActiveEntry represents a modified file'''
+        return self.change is self.MODIFIED
 
     def is_removed(self):
-        return self.change == self.REMOVED
+        '''Return True if this ActiveEntry represents a removed file'''
+        return self.change is self.REMOVED
 
     def is_renamed(self):
+        '''Return True if this ActiveEntry represents a renamed file'''
         return self.parentname and self.renamed
 
     def is_copied(self):
+        '''Return True if this ActiveEntry represents a copied file'''
         return self.parentname and not self.renamed
 
 
 class ActiveList(object):
-    '''Complete representation of workspace change.
+    '''Complete representation of change between two changesets.
 
-    In practice, a container for ActiveEntrys, and methods to build them,
-    update them, and deal with them en masse.'''
+    In practice, a container for ActiveEntry objects, and methods to
+    create them, and deal with them as a group.'''
 
     def __init__(self, ws, parenttip, revs=None):
-        self._active = {}
-        self.ws = ws
-
-        self.revs = revs
-
-        self.base = None
-        self.parenttip = parenttip
+        '''Initialize the ActiveList
 
-        #
-        # If we couldn't find a parenttip, the two repositories must
-        # be unrelated (Hg catches most of this, but this case is valid for it
-        # but invalid for us)
-        #
-        if self.parenttip == None:
-            raise util.Abort('repository is unrelated')
+        parenttip is the revision with which to compare (likely to be
+        from the parent), revs is a topologically sorted list of
+        revisions ending with the revision to compare with (likely to
+        be the child-local revisions).'''
+
+        assert parenttip is not None
+
+        self.ws = ws
+        self.revs = revs
+        self.parenttip = parenttip
         self.localtip = None
 
-        if revs:
-            self.base = revs[0]
-            self.localtip = revs[-1]
-
+        self._active = {}
         self._comments = []
 
-        self._build(revs)
-
-    def _build(self, revs):
-        if not revs:
-            return
+        if revs:
+            self.localtip = revs[-1]
+            self._build()
 
-        status = self.ws.status(self.parenttip.node(), self.localtip.node())
+    def _status(self):
+        '''Return the status of any file mentioned in any of the
+        changesets making up this active list.'''
 
-        files = []
-        for ctype in status.values():
-            files.extend(ctype)
+        files = set()
+        for c in self.revs:
+            files.update(c.files())
 
         #
-        # When a file is renamed, two operations actually occur.
-        # A file copy from source to dest and a removal of source.
+        # Any file not in the parenttip or the localtip is ephemeral
+        # and can be ignored. Mercurial will complain regarding these
+        # files if the localtip is a workingctx, so remove them in
+        # that case.
         #
-        # These are represented as two distinct entries in the
-        # changectx and status (one on the dest file for the
-        # copy, one on the source file for the remove).
+        # Compare against the dirstate because a workingctx manifest
+        # is created on-demand and is particularly expensive.
         #
-        # Since these are unconnected in both the context and
-        # status we can only make the association by explicitly
-        # looking for it.
-        #
-        # We deal with this thusly:
-        #
-        # We maintain a dict dest -> source of all copies
-        # (updating dest as appropriate, but leaving source alone).
-        #
-        # After all other processing, we mark as renamed any pair
-        # where source is on the removed list.
-        #
-        copies = {}
+        if self.localtip.rev() is None:
+            for f in files.copy():
+                if f not in self.parenttip and f not in self.ws.repo.dirstate:
+                    files.remove(f)
+
+        return self.ws.status(self.parenttip, self.localtip, files=files)
+
+    def _build(self):
+        '''Construct ActiveEntry objects for each changed file.
+
+        This works in 3 stages:
+
+          - Create entries for every changed file with
+            semi-appropriate change type
+
+          - Track renames/copies, and set change comments (both
+            ActiveList-wide, and per-file).
+
+          - Cleanup
+            - Drop circular renames
+            - Drop the removal of the old name of any rename
+            - Drop entries for modified files that haven't actually changed'''
 
         #
-        # Walk revs looking for renames and adding files that
-        # are in both change context and status to the active
-        # list.
+        # Keep a cache of filectx objects (keyed on pathname) so that
+        # we can avoid opening filelogs numerous times.
         #
-        for ctx in revs:
-            desc = ctx.description().splitlines()
-
-            self._comments.extend(desc)
+        fctxcache = {}
 
-            for fname in ctx.files():
-                #
-                # We store comments per-entry as well, for the sake of
-                # webrev and similar.  We store twice to avoid the problems
-                # of uniquifying comments for the general list (and possibly
-                # destroying multi-line entities in the process).
-                #
-                if fname not in self:
-                    self._addentry(fname)
-                self[fname].comments.extend(desc)
-
-                try:
+        def oldname(ctx, fname):
+            '''Return the name 'fname' held prior to any possible
+            rename/copy in the given changeset.'''
+            try:
+                if fname in fctxcache:
+                    octx = fctxcache[fname]
+                    fctx = ctx.filectx(fname, filelog=octx.filelog())
+                else:
                     fctx = ctx.filectx(fname)
-                except error.LookupError:
-                    continue
-
-                #
-                # NB: .renamed() is a misnomer, this actually checks
-                #     for copies.
-                #
-                rn = fctx.renamed()
-                if rn:
+                    #
+                    # workingfilectx objects may not refer to the
+                    # right filelog (in case of rename).  Don't cache
+                    # them.
                     #
-                    # If the source file is a known copy we know its
-                    # ancestry leads us to the parent.
-                    # Otherwise make sure the source file is known to
-                    # be in the parent, we need not care otherwise.
-                    #
-                    # We detect cycles at a later point.  There is no
-                    # reason to continuously handle them.
-                    #
-                    if rn[0] in copies:
-                        copies[fname] = copies[rn[0]]
-                    elif rn[0] in self.parenttip.manifest():
-                        copies[fname] = rn[0]
+                    if not isinstance(fctx, context.workingfilectx):
+                        fctxcache[fname] = fctx
+            except error.LookupError:
+                return None
+
+            rn = fctx.renamed()
+            return rn and rn[0] or fname
+
+        status = self._status()
+        self._active = dict((fname, ActiveEntry(fname, kind))
+                            for fname, kind in status.iteritems()
+                            if kind in ('modified', 'added', 'removed'))
 
         #
-        # Walk the copy list marking as copied any non-cyclic pair
-        # where the destination file is still present in the local
-        # tip (to avoid ephemeral changes)
-        #
-        # Where source is removed, mark as renamed, and remove the
-        # AL entry for the source file
+        # We do two things:
+        #    - Gather checkin comments (for the entire ActiveList, and
+        #      per-file)
+        #    - Set the .parentname of any copied/renamed file
         #
-        for fname, oldname in copies.iteritems():
-            if fname == oldname or fname not in self.localtip.manifest():
-                continue
-
-            self[fname].parentname = oldname
-
-            if oldname in status['removed']:
-                self[fname].renamed = True
-                if oldname in self:
-                    del self[oldname]
-
+        # renames/copies:
+        #   We walk the list of revisions backward such that only files
+        #   that ultimately remain active need be considered.
+        #
+        #   At each iteration (revision) we update the .parentname of
+        #   any active file renamed or copied in that revision (the
+        #   current .parentname if set, or .name otherwise, reflects
+        #   the name of a given active file in the revision currently
+        #   being looked at)
         #
-        # Walk the active list setting the change type for each active
-        # file.
-        #
-        # In the case of modified files that are not renames or
-        # copies, we do a content comparison, and drop entries that
-        # are not actually modified.
-        #
-        # We walk a copy of the AL such that we can drop entries
-        # within the loop.
-        #
+        for ctx in reversed(self.revs):
+            desc = ctx.description().splitlines()
+            self._comments = desc + self._comments
+            cfiles = set(ctx.files())
+
+            for entry in self:
+                fname = entry.parentname or entry.name
+                if fname not in cfiles:
+                    continue
+
+                entry.comments = desc + entry.comments
+
+                #
+                # We don't care about the name history of any file
+                # that ends up being removed, since that trumps any
+                # possible renames or copies along the way.
+                #
+                # Changes that we may care about involving an
+                # intermediate name of a removed file will appear
+                # separately (related to the eventual name along
+                # that line)
+                #
+                if not entry.is_removed():
+                    entry.parentname = oldname(ctx, fname)
+
         for entry in self._active.values():
-            if entry.name not in files:
-                del self[entry.name]
-                continue
+            #
+            # For any file marked as copied or renamed, clear the
+            # .parentname if the copy or rename is cyclic (source ==
+            # destination) or if the .parentname did not exist in the
+            # parenttip.
+            #
+            # If the parentname is marked as removed, set the renamed
+            # flag and remove any ActiveEntry we may have for the
+            # .parentname.
+            #
+            if entry.parentname:
+                if (entry.parentname == entry.name or
+                    entry.parentname not in self.parenttip):
+                    entry.parentname = None
+                elif status.get(entry.parentname) == 'removed':
+                    entry.renamed = True
 
-            if entry.name in status['added']:
-                entry.change = ActiveEntry.ADDED
-            elif entry.name in status['removed']:
-                entry.change = ActiveEntry.REMOVED
-            elif entry.name in status['modified']:
-                entry.change = ActiveEntry.MODIFIED
+                    if entry.parentname in self:
+                        del self[entry.parentname]
 
             #
-            # There are cases during a merge where a file will be in
-            # the status return as modified, but in reality be an
-            # addition (ie, not in the parenttip).
+            # There are cases during a merge where a file will be seen
+            # as modified by status but in reality be an addition (not
+            # in the parenttip), so we have to check whether the file
+            # is in the parenttip and set it as an addition, if not.
             #
-            # We need to check whether the file is actually present
-            # in the parenttip, and set it as an add, if not.
+            # If a file is modified (and not a copy or rename), we do
+            # a full comparison to the copy in the parenttip and
+            # ignore files that are parts of active revisions but
+            # unchanged.
             #
-            if entry.name not in self.parenttip.manifest():
+            if entry.name not in self.parenttip:
                 entry.change = ActiveEntry.ADDED
             elif entry.is_modified():
                 if not self._changed_file(entry.name):
                     del self[entry.name]
-                    continue
-
-            assert entry.change
 
     def __contains__(self, fname):
         return fname in self._active
@@ -289,12 +306,7 @@
         del self._active[key]
 
     def __iter__(self):
-        for entry in self._active.values():
-            yield entry
-
-    def _addentry(self, fname):
-        if fname not in self:
-            self[fname] = ActiveEntry(fname)
+        return self._active.itervalues()
 
     def files(self):
         '''Return the list of pathnames of all files touched by this
@@ -305,11 +317,13 @@
         '''
 
         ret = self._active.keys()
-        ret.extend([x.parentname for x in self
-                    if x.is_renamed() and x.parentname not in ret])
-        return ret
+        ret.extend(x.parentname for x in self if x.is_renamed())
+        return set(ret)
 
     def comments(self):
+        '''Return the full set of changeset comments associated with
+        this ActiveList'''
+
         return self._comments
 
     def bases(self):
@@ -391,6 +405,134 @@
         ActiveList as one change.'''
         return activectx(self, message, user)
 
+    def as_text(self, paths):
+        '''Return the ActiveList as a block of text in a format
+        intended to aid debugging and simplify the test suite.
+
+        paths should be a list of paths for which file-level data
+        should be included.  If it is empty, the whole active list is
+        included.'''
+
+        cstr = cStringIO.StringIO()
+
+        cstr.write('parent tip: %s:%s\n' % (self.parenttip.rev(),
+                                            self.parenttip))
+        if self.localtip:
+            rev = self.localtip.rev()
+            cstr.write('local tip:  %s:%s\n' %
+                       (rev is None and "working" or rev, self.localtip))
+        else:
+            cstr.write('local tip:  None\n')
+
+        cstr.write('entries:\n')
+        for entry in self:
+            if paths and self.ws.filepath(entry.name) not in paths:
+                continue
+
+            cstr.write('  - %s\n' % entry.name)
+            cstr.write('    parentname: %s\n' % entry.parentname)
+            cstr.write('    change: %s\n' % entry.change)
+            cstr.write('    renamed: %s\n' % entry.renamed)
+            cstr.write('    comments:\n')
+            cstr.write('      ' + '\n      '.join(entry.comments) + '\n')
+            cstr.write('\n')
+
+        return cstr.getvalue()
+
+
+class WorkList(object):
+    '''A (user-maintained) list of files changed in this workspace as
+    compared to any parent workspace.
+
+    Internally, the WorkList is stored in .hg/cdm/worklist as a list
+    of file pathnames, one per-line.
+
+    This may only safely be used as a hint regarding possible
+    modifications to the working copy, it should not be relied upon to
+    suggest anything about committed changes.'''
+
+    def __init__(self, ws):
+        '''Load the WorkList for the specified WorkSpace from disk.'''
+
+        self._ws = ws
+        self._repo = ws.repo
+        self._file = os.path.join('cdm', 'worklist')
+        self._files = set()
+        self._valid = False
+
+        if os.path.exists(self._repo.join(self._file)):
+            self.load()
+
+    def __nonzero__(self):
+        '''A WorkList object is true if it was loaded from disk,
+        rather than freshly created.
+        '''
+
+        return self._valid
+
+    def list(self):
+        '''List of pathnames contained in the WorkList
+        '''
+
+        return list(self._files)
+
+    def status(self):
+        '''Return the status (in tuple form) of files from the
+        WorkList as they are in the working copy
+        '''
+
+        match = self._ws.matcher(files=self.list())
+        return self._repo.status(match=match)
+
+    def add(self, fname):
+        '''Add FNAME to the WorkList.
+        '''
+
+        self._files.add(fname)
+
+    def write(self):
+        '''Write the WorkList out to disk.
+        '''
+
+        dirn = os.path.split(self._file)[0]
+
+        if dirn and not os.path.exists(self._repo.join(dirn)):
+            try:
+                os.makedirs(self._repo.join(dirn))
+            except EnvironmentError, e:
+                raise util.Abort("Couldn't create directory %s: %s" %
+                                 (self._repo.join(dirn), e))
+
+        fh = self._repo.opener(self._file, 'w', atomictemp=True)
+
+        for name in self._files:
+            fh.write("%s\n" % name)
+
+        fh.rename()
+        fh.close()
+
+    def load(self):
+        '''Read in the WorkList from disk.
+        '''
+
+        fh = self._repo.opener(self._file, 'r')
+        self._files = set(l.rstrip('\n') for l in fh)
+        self._valid = True
+        fh.close()
+
+    def delete(self):
+        '''Empty the WorkList
+
+        Remove the on-disk WorkList and clear the file-list of the
+        in-memory copy
+        '''
+
+        if os.path.exists(self._repo.join(self._file)):
+            os.unlink(self._repo.join(self._file))
+
+        self._files = set()
+        self._valid = False
+
 
 class activectx(context.memctx):
     '''Represent an ActiveList as a Mercurial context object.
@@ -477,10 +619,6 @@
         will be the most recent head on the current branch.
         '''
 
-        #
-        # A modified working copy is seen as a proto-branch, and thus
-        # our only option as the local tip.
-        #
         if (wctx.files() or len(wctx.parents()) > 1 or
             wctx.branch() != wctx.parents()[0].branch()):
             return wctx
@@ -497,13 +635,14 @@
 
         return ltip
 
-    def _parenttip(self, heads, outgoing):
+    def parenttip(self, heads, outgoing):
         '''Return the highest-numbered, non-outgoing changeset that is
         an ancestor of a changeset in heads.
 
-        This is intended to find the most recent changeset on a given
-        branch that is shared between a parent and child workspace,
-        such that it can act as a stand-in for the parent workspace.
+        This returns the most recent changeset on a given branch that
+        is shared between a parent and child workspace, in effect the
+        common ancestor of the chosen local tip and the parent
+        workspace.
         '''
 
         def tipmost_shared(head, outnodes):
@@ -529,15 +668,20 @@
         ptips = map(lambda x: tipmost_shared(x, nodes), heads)
         return sorted(ptips, key=lambda x: x.rev(), reverse=True)[0]
 
-    def status(self, base='.', head=None):
+    def status(self, base='.', head=None, files=None):
         '''Translate from the hg 6-tuple status format to a hash keyed
         on change-type'''
 
         states = ['modified', 'added', 'removed', 'deleted', 'unknown',
-              'ignored']
+                  'ignored']
+
+        match = self.matcher(files=files)
+        chngs = self.repo.status(base, head, match=match)
 
-        chngs = self.repo.status(base, head)
-        return dict(zip(states, chngs))
+        ret = {}
+        for paths, change in zip(chngs, states):
+            ret.update((f, change) for f in paths)
+        return ret
 
     def findoutgoing(self, parent):
         '''Return the base set of outgoing nodes.
@@ -568,12 +712,14 @@
 
     def modified(self):
         '''Return a list of files modified in the workspace'''
+
         wctx = self.workingctx()
         return sorted(wctx.files() + wctx.deleted()) or None
 
     def merged(self):
         '''Return boolean indicating whether the workspace has an uncommitted
         merge'''
+
         wctx = self.workingctx()
         return len(wctx.parents()) > 1
 
@@ -584,60 +730,65 @@
         wctx = self.workingctx()
         return wctx.branch() != wctx.parents()[0].branch()
 
-    def active(self, parent=None):
+    def active(self, parent=None, thorough=False):
         '''Return an ActiveList describing changes between workspace
         and parent workspace (including uncommitted changes).
-        If workspace has no parent, ActiveList will still describe any
-        uncommitted changes.'''
+        If the workspace has no parent, ActiveList will still describe any
+        uncommitted changes.
+
+        If thorough is True use neither the WorkList nor any cached
+        results (though the result of this call will be cached for
+        future, non-thorough, calls).'''
 
         parent = self.parent(parent)
-        if parent in self.activecache:
+
+        #
+        # Use the cached copy if we can (we have one, and weren't
+        # asked to be thorough)
+        #
+        if not thorough and parent in self.activecache:
             return self.activecache[parent]
 
+        #
+        # outbases: The set of outgoing nodes with no outgoing ancestors
+        # outnodes: The full set of outgoing nodes
+        #
         if parent:
-            outgoing = self.findoutgoing(parent)
-            outnodes = self.repo.changelog.nodesbetween(outgoing)[0]
-        else:
-            outgoing = []       # No parent, no outgoing nodes
+            outbases = self.findoutgoing(parent)
+            outnodes = self.repo.changelog.nodesbetween(outbases)[0]
+        else:               # No parent, no outgoing nodes
+            outbases = []
             outnodes = []
 
-        localtip = self._localtip(outnodes, self.workingctx())
+        wctx = self.workingctx(worklist=not thorough)
+        localtip = self._localtip(outnodes, wctx)
 
         if localtip.rev() is None:
             heads = localtip.parents()
         else:
             heads = [localtip]
 
+        parenttip = self.parenttip(heads, outnodes)
+
+        #
+        # If we couldn't find a parenttip, the two repositories must
+        # be unrelated (Hg catches most of this, but this case is
+        # valid for it but invalid for us)
+        #
+        if parenttip == None:
+            raise util.Abort('repository is unrelated')
+
+        headnodes = [h.node() for h in heads]
         ctxs = [self.repo.changectx(n) for n in
-                self.repo.changelog.nodesbetween(outgoing,
-                                                 [h.node() for h in heads])[0]]
+                self.repo.changelog.nodesbetween(outbases, headnodes)[0]]
 
         if localtip.rev() is None:
             ctxs.append(localtip)
 
-        act = ActiveList(self, self._parenttip(heads, outnodes), ctxs)
-
+        act = ActiveList(self, parenttip, ctxs)
         self.activecache[parent] = act
-        return act
-
-    def pdiff(self, pats, opts, parent=None):
-        'Return diffs relative to PARENT, as best as we can make out'
-
-        parent = self.parent(parent)
-        act = self.active(parent)
 
-        #
-        # act.localtip maybe nil, in the case of uncommitted local
-        # changes.
-        #
-        if not act.revs:
-            return
-
-        matchfunc = cmdutil.match(self.repo, pats, opts)
-        opts = patch.diffopts(self.ui, opts)
-
-        return self.diff(act.parenttip.node(), act.localtip.node(),
-                         match=matchfunc, opts=opts)
+        return act
 
     def squishdeltas(self, active, message, user=None):
         '''Create a single conglomerate changeset based on a given
@@ -722,9 +873,9 @@
         self.ui.pushbuffer()
 
         #
-        # Remove the active lists component changesets by stripping
-        # the base of any active branch (of which there may be
-        # several)
+        # Remove the previous child-local changes by stripping the
+        # nodes that form the base of the ActiveList (removing their
+        # children in the process).
         #
         try:
             try:
@@ -772,6 +923,7 @@
 
     def filepath(self, path):
         'Return the full path to a workspace file.'
+
         return self.repo.pathto(path)
 
     def clean(self, rev=None):
@@ -787,22 +939,60 @@
 
     def mq_applied(self):
         '''True if the workspace has Mq patches applied'''
+
         q = mq.queue(self.ui, self.repo.join(''))
         return q.applied
 
-    def workingctx(self):
-        return self.repo.changectx(None)
+    def workingctx(self, worklist=False):
+        '''Return a workingctx object representing the working copy.
+
+        If worklist is true, return a workingctx object created based
+        on the status of files in the workspace's worklist.'''
+
+        wl = WorkList(self)
+
+        if worklist and wl:
+            return context.workingctx(self.repo, changes=wl.status())
+        else:
+            return self.repo.changectx(None)
+
+    def matcher(self, pats=None, opts=None, files=None):
+        '''Return a match object suitable for Mercurial based on
+        specified criteria.
+
+        If files is specified it is a list of pathnames relative to
+        the repository root to be matched precisely.
+
+        If pats and/or opts are specified, these are as to
+        cmdutil.match'''
+
+        of_patterns = pats is not None or opts is not None
+        of_files = files is not None
+        opts = opts or {}       # must be a dict
+
+        assert not (of_patterns and of_files)
+
+        if of_patterns:
+            return cmdutil.match(self.repo, pats, opts)
+        elif of_files:
+            return cmdutil.matchfiles(self.repo, files)
+        else:
+            return cmdutil.matchall(self.repo)
 
     def diff(self, node1=None, node2=None, match=None, opts=None):
+        '''Return the diff of changes between two changesets as a string'''
+
+        #
+        # Retain compatibility by only calling diffopts() if it
+        # obviously has not already been done.
+        #
+        if isinstance(opts, dict):
+            opts = patch.diffopts(self.ui, opts)
+
         ret = cStringIO.StringIO()
-        try:
-            for chunk in patch.diff(self.repo, node1, node2, match=match,
-                                    opts=opts):
-                ret.write(chunk)
-        finally:
-            # Workaround Hg bug 1651
-            if not Version.at_least("1.3"):
-                self.repo.dirstate.invalidate()
+        for chunk in patch.diff(self.repo, node1, node2, match=match,
+                                opts=opts):
+            ret.write(chunk)
 
         return ret.getvalue()
 
--- a/usr/src/tools/onbld/hgext/cdm.py	Wed Feb 09 11:52:15 2011 -0800
+++ b/usr/src/tools/onbld/hgext/cdm.py	Fri Feb 11 13:58:20 2011 -0500
@@ -15,7 +15,7 @@
 
 #
 # Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2008, 2010 Richard Lowe
+# Copyright 2008, 2011 Richard Lowe
 #
 
 '''OpenSolaris extensions to Mercurial
@@ -34,11 +34,6 @@
 on the current branch that is also in the parent workspace to
 represent the parent workspace.
 
-    To provide a uniform notion of parent workspace regardless of
-filesystem-based access, Cadmium uses the highest numbered changeset
-on the current branch that is also in the parent workspace to
-represent the parent workspace.
-
 
 The Active List
 
@@ -104,9 +99,9 @@
 except Version.VersionMismatch, badversion:
     raise util.Abort("Version Mismatch:\n %s\n" % badversion)
 
-from mercurial import cmdutil, ignore, node
+from mercurial import cmdutil, ignore, node, patch
 
-from onbld.Scm.WorkSpace import ActiveEntry, WorkSpace
+from onbld.Scm.WorkSpace import WorkSpace, WorkList
 from onbld.Scm.Backup import CdmBackup
 from onbld.Checks import Cddl, Comments, Copyright, CStyle, HdrChk
 from onbld.Checks import JStyle, Keywords, Mapfile
@@ -242,9 +237,22 @@
     cdm').
     '''
 
-    parent = opts['parent']
+    act = wslist[repo].active(opts.get('parent'))
+    if not act.revs:
+        return
 
-    diffs = wslist[repo].pdiff(pats, opts, parent=parent)
+    #
+    # If no patterns were specified, either explicitly or via -I or -X
+    # use the active list files to avoid a workspace walk.
+    #
+    if pats or opts.get('include') or opts.get('exclude'):
+        matchfunc = wslist[repo].matcher(pats=pats, opts=opts)
+    else:
+        matchfunc = wslist[repo].matcher(files=act.files())
+
+    opts = patch.diffopts(ui, opts)
+    diffs = wslist[repo].diff(act.parenttip.node(), act.localtip.node(),
+                              match=matchfunc, opts=opts)
     if diffs:
         ui.write(diffs)
 
@@ -268,41 +276,30 @@
     and --removed.  By default, all files are shown.
     '''
 
-    wanted = []
+    act = wslist[repo].active(opts['parent'])
+    wanted = set(x for x in ('added', 'modified', 'removed') if opts[x])
+    changes = {}
 
-    if opts['added']:
-        wanted.append(ActiveEntry.ADDED)
-    if opts['modified']:
-        wanted.append(ActiveEntry.MODIFIED)
-    if opts['removed']:
-        wanted.append(ActiveEntry.REMOVED)
-
-    act = wslist[repo].active(opts['parent'])
-    chngmap = {ActiveEntry.MODIFIED: 'modified',
-               ActiveEntry.ADDED: 'added',
-               ActiveEntry.REMOVED: 'removed'}
-
-    lst = {}
     for entry in act:
         if wanted and (entry.change not in wanted):
             continue
 
-        chngstr = chngmap[entry.change]
-        if chngstr not in lst:
-            lst[chngstr] = []
-        lst[chngstr].append(entry)
+        if entry.change not in changes:
+            changes[entry.change] = []
+        changes[entry.change].append(entry)
+
+    for change in sorted(changes.keys()):
+        ui.write(change + ':\n')
 
-    for chng in sorted(lst.keys()):
-        ui.write(chng + ':\n')
-        for elt in sorted(lst[chng]):
-            if elt.is_renamed():
-                ui.write('\t%s (renamed from %s)\n' % (elt.name,
-                                                      elt.parentname))
-            elif elt.is_copied():
-                ui.write('\t%s (copied from %s)\n' % (elt.name,
-                                                      elt.parentname))
+        for entry in sorted(changes[change]):
+            if entry.is_renamed():
+                ui.write('\t%s (renamed from %s)\n' % (entry.name,
+                                                      entry.parentname))
+            elif entry.is_copied():
+                ui.write('\t%s (copied from %s)\n' % (entry.name,
+                                                      entry.parentname))
             else:
-                ui.write('\t%s\n' % elt.name)
+                ui.write('\t%s\n' % entry.name)
 
 
 def cdm_bugs(ui, repo, parent=None):
@@ -913,6 +910,11 @@
                 ui.warn('\t%d\n' % head)
             raise util.Abort('you must merge before recommitting')
 
+        #
+        # We can safely use the worklist here, as we know (from the
+        # abort_if_dirty() check above) that the working copy has not been
+        # modified.
+        #
         active = ws.active(parent)
 
         if filter(lambda b: len(b.parents()) > 1, active.bases()):
@@ -1264,6 +1266,120 @@
     return 0
 
 
+def cdm_debugcdmal(ui, repo, *pats, **opts):
+    '''dump the active list for the sake of debugging/testing'''
+
+    ui.write(wslist[repo].active(opts['parent']).as_text(pats))
+
+
+def cdm_changed(ui, repo, *pats, **opts):
+    '''mark a file as changed in the working copy
+
+    Maintain a list of files checked for modification in the working
+    copy.  If the list exists, most cadmium commands will only check
+    the working copy for changes to those files, rather than checking
+    the whole workspace (this does not apply to committed changes,
+    which are always seen).
+
+    Since this list functions only as a hint as to where in the
+    working copy to look for changes, entries that have not actually
+    been modified (in the working copy, or in general) are not
+    problematic.
+
+
+    Note: If such a list exists, it must be kept up-to-date.
+
+
+    Renamed files can be added with reference only to their new name:
+      $ hg mv foo bar
+      $ hg changed bar
+
+    Without arguments, 'hg changed' will list all files recorded as
+    altered, such that, for instance:
+      $ hg status $(hg changed)
+      $ hg diff $(hg changed)
+    Become useful (generally faster than their unadorned counterparts)
+
+    To create an initially empty list:
+      $ hg changed -i
+    Until files are added to the list it is equivalent to saying
+    "Nothing has been changed"
+
+    Update the list based on the current active list:
+      $ hg changed -u
+    The old list is emptied, and replaced with paths from the
+    current active list.
+
+    Remove the list entirely:
+      $ hg changed -d
+    '''
+
+    def modded_files(repo, parent):
+        out = wslist[repo].findoutgoing(wslist[repo].parent(parent))
+        outnodes = repo.changelog.nodesbetween(out)[0]
+
+        files = set()
+        for n in outnodes:
+            files.update(repo.changectx(n).files())
+
+        files.update(wslist[repo].status().keys())
+        return files
+
+    #
+    # specced_pats is convenient to treat as a boolean indicating
+    # whether any file patterns or paths were specified.
+    #
+    specced_pats = pats or opts['include'] or opts['exclude']
+    if len(filter(None, [opts['delete'], opts['update'], opts['init'],
+                         specced_pats])) > 1:
+        raise util.Abort("-d, -u, -i and patterns are mutually exclusive")
+
+    wl = WorkList(wslist[repo])
+
+    if (not wl and specced_pats) or opts['init']:
+        wl.delete()
+        if yes_no(ui, "Create a list based on your changes thus far?", True):
+            map(wl.add, modded_files(repo, opts.get('parent')))
+
+    if opts['delete']:
+        wl.delete()
+    elif opts['update']:
+        wl.delete()
+        map(wl.add, modded_files(repo, opts.get('parent')))
+        wl.write()
+    elif opts['init']:       # Any possible old list was deleted above
+        wl.write()
+    elif specced_pats:
+        sources = []
+
+        match = wslist[repo].matcher(pats=pats, opts=opts)
+        for abso in repo.walk(match):
+            if abso in repo.dirstate:
+                wl.add(abso)
+                #
+                # Store the source name of any copy.  We use this so
+                # both the add and delete of a rename can be entered
+                # into the WorkList with only the destination name
+                # explicitly being mentioned.
+                #
+                fctx = wslist[repo].workingctx().filectx(abso)
+                rn = fctx.renamed()
+                if rn:
+                    sources.append(rn[0])
+            else:
+                ui.warn("%s is not version controlled -- skipping\n" %
+                        match.rel(abso))
+
+        if sources:
+            for fname, chng in wslist[repo].status(files=sources).iteritems():
+                if chng == 'removed':
+                    wl.add(fname)
+        wl.write()
+    else:
+        for elt in sorted(wl.list()):
+            ui.write("%s\n" % wslist[repo].filepath(elt))
+
+
 cmdtable = {
     'apply': (cdm_apply, [('p', 'parent', '', 'parent workspace'),
                           ('r', 'remain', None, 'do not change directory')],
@@ -1277,6 +1393,18 @@
              'hg bugs [-p PARENT]'),
     'cddlchk': (cdm_cddlchk, [('p', 'parent', '', 'parent workspace')],
                 'hg cddlchk [-p PARENT]'),
+    'changed': (cdm_changed, [('d', 'delete', None, 'delete the file list'),
+                              ('u', 'update', None, 'mark all changed files'),
+                              ('i', 'init', None, 'create an empty file list'),
+                              ('p', 'parent', '', 'parent workspace'),
+                              ('I', 'include', [],
+                               'include names matching the given patterns'),
+                              ('X', 'exclude', [],
+                               'exclude names matching the given patterns')],
+                'hg changed -d\n'
+                'hg changed -u\n'
+                'hg changed -i\n'
+                'hg changed [-I PATTERN...] [-X PATTERN...] [FILE...]'),
     'comchk': (cdm_comchk, [('p', 'parent', '', 'parent workspace'),
                             ('N', 'nocheck', None,
                              'do not compare comments with databases')],
@@ -1287,6 +1415,8 @@
                   'hg copyright [-p PARENT]'),
     'cstyle': (cdm_cstyle, [('p', 'parent', '', 'parent workspace')],
                'hg cstyle [-p PARENT]'),
+    'debugcdmal': (cdm_debugcdmal, [('p', 'parent', '', 'parent workspace')],
+                   'hg debugcdmal [-p PARENT] [FILE...]'),
     'eval': (cdm_eval, [('p', 'parent', '', 'parent workspace'),
                         ('r', 'remain', None, 'do not change directory')],
              'hg eval [-p PARENT] [-r] command...'),