# HG changeset patch # User mason@suse.com # Date 1123869476 28800 # Node ID 9a0af739cf55634eff94f2fb84d5bf322bf283db # Parent 6390c377a9e617e01061d33f3729948d72b6f54f dirstate walking optimizations The repo walking code introduces a number of calls to dirstate.map.copy(), significantly slowing down the walk on large trees. When a list of files is passed to the walking code, we should only look at map entries relevant to the file list passed in. dirstate.filterfiles() is added to return a subset of the dirstate map. The subset includes in files passed in, and if one of the files requested is actually a directory, it includes any files inside that directory tree. This brings the time for hg diff Makefile down from 1.7s to .3s on a linux kernel repo. Also, the diff command was unconditionally calling makewalk, leading to an extra pass through repo.changes. This patch avoids the call to makewalk when commands.diff isn't given a list of patterns, cutting the time for hg diff (with no args) in half. Index: mine/mercurial/hg.py =================================================================== diff -r 6390c377a9e6 -r 9a0af739cf55 mercurial/commands.py --- a/mercurial/commands.py Tue Aug 09 09:36:34 2005 -0800 +++ b/mercurial/commands.py Fri Aug 12 09:57:56 2005 -0800 @@ -634,9 +634,11 @@ raise Abort("too many revisions to diff") files = [] - roots, match, results = makewalk(repo, pats, opts) - for src, abs, rel in results: - files.append(abs) + match = util.always + if pats: + roots, match, results = makewalk(repo, pats, opts) + for src, abs, rel in results: + files.append(abs) dodiff(sys.stdout, ui, repo, files, *revs, **{'match': match}) def doexport(ui, repo, changeset, seqno, total, revwidth, opts): diff -r 6390c377a9e6 -r 9a0af739cf55 mercurial/hg.py --- a/mercurial/hg.py Tue Aug 09 09:36:34 2005 -0800 +++ b/mercurial/hg.py Fri Aug 12 09:57:56 2005 -0800 @@ -435,11 +435,50 @@ st.write(e + f) self.dirty = 0 - def walk(self, files = None, match = util.always): + def filterfiles(self, files): + ret = {} + unknown = [] + + for x in files: + if x is '.': + return self.map.copy() + if x not in self.map: + unknown.append(x) + else: + ret[x] = self.map[x] + + if not unknown: + return ret + + b = self.map.keys() + b.sort() + blen = len(b) + + for x in unknown: + bs = bisect.bisect(b, x) + if bs != 0 and b[bs-1] == x: + ret[x] = self.map[x] + continue + while bs < blen: + s = b[bs] + if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/': + ret[s] = self.map[s] + else: + break + bs += 1 + return ret + + def walk(self, files = None, match = util.always, dc=None): self.read() - dc = self.map.copy() + # walk all files by default - if not files: files = [self.root] + if not files: + files = [self.root] + if not dc: + dc = self.map.copy() + elif not dc: + dc = self.filterfiles(files) + known = {'.hg': 1} def seen(fn): if fn in known: return True @@ -477,19 +516,20 @@ for src, fn in util.unique(traverse()): fn = os.path.normpath(fn) if seen(fn): continue - if fn in dc: - del dc[fn] - elif self.ignore(fn): + if fn not in dc and self.ignore(fn): continue if match(fn): yield src, fn def changes(self, files = None, match = util.always): self.read() - dc = self.map.copy() + if not files: + dc = self.map.copy() + else: + dc = self.filterfiles(files) lookup, changed, added, unknown = [], [], [], [] - for src, fn in self.walk(files, match): + for src, fn in self.walk(files, match, dc=dc): try: s = os.stat(os.path.join(self.root, fn)) except: continue