comparison mercurial/hg.py @ 879:953ccddd57bd

dirstate walking optimizations The repo walking code introduces a number of calls to dirstate.map.copy(), significantly slowing down the walk on large trees. When a list of files is passed to the walking code, we should only look at map entries relevant to the file list passed in. dirstate.filterfiles() is added to return a subset of the dirstate map. The subset includes in files passed in, and if one of the files requested is actually a directory, it includes any files inside that directory tree. This brings the time for hg diff Makefile down from 1.7s to .3s on a linux kernel repo. Also, the diff command was unconditionally calling makewalk, leading to an extra pass through repo.changes. This patch avoids the call to makewalk when commands.diff isn't given a list of patterns, cutting the time for hg diff (with no args) in half. Index: mine/mercurial/hg.py ===================================================================
author mason@suse.com
date Fri, 12 Aug 2005 07:10:21 -0800
parents c2e77581bc84
children 63ca8a68d59e
comparison
equal deleted inserted replaced
871:c2e77581bc84 879:953ccddd57bd
438 f = f + "\0" + c 438 f = f + "\0" + c
439 e = struct.pack(">cllll", e[0], e[1], e[2], e[3], len(f)) 439 e = struct.pack(">cllll", e[0], e[1], e[2], e[3], len(f))
440 st.write(e + f) 440 st.write(e + f)
441 self.dirty = 0 441 self.dirty = 0
442 442
443 def walk(self, files = None, match = util.always): 443 def filterfiles(self, files):
444 ret = {}
445 unknown = []
446
447 for x in files:
448 if x is '.':
449 return self.map.copy()
450 if x not in self.map:
451 unknown.append(x)
452 else:
453 ret[x] = self.map[x]
454
455 if not unknown:
456 return ret
457
458 b = self.map.keys()
459 b.sort()
460 blen = len(b)
461
462 for x in unknown:
463 bs = bisect.bisect(b, x)
464 if bs != 0 and b[bs-1] == x:
465 ret[x] = self.map[x]
466 continue
467 while bs < blen:
468 s = b[bs]
469 if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/':
470 ret[s] = self.map[s]
471 else:
472 break
473 bs += 1
474 return ret
475
476 def walk(self, files = None, match = util.always, dc=None):
444 self.read() 477 self.read()
445 dc = self.map.copy() 478
446 # walk all files by default 479 # walk all files by default
447 if not files: files = [self.root] 480 if not files:
481 files = [self.root]
482 if not dc:
483 dc = self.map.copy()
484 elif not dc:
485 dc = self.filterfiles(files)
486
448 known = {'.hg': 1} 487 known = {'.hg': 1}
449 def seen(fn): 488 def seen(fn):
450 if fn in known: return True 489 if fn in known: return True
451 known[fn] = 1 490 known[fn] = 1
452 def traverse(): 491 def traverse():
480 # not in .hgignore 519 # not in .hgignore
481 520
482 for src, fn in util.unique(traverse()): 521 for src, fn in util.unique(traverse()):
483 fn = os.path.normpath(fn) 522 fn = os.path.normpath(fn)
484 if seen(fn): continue 523 if seen(fn): continue
485 if fn in dc: 524 if fn not in dc and self.ignore(fn):
486 del dc[fn]
487 elif self.ignore(fn):
488 continue 525 continue
489 if match(fn): 526 if match(fn):
490 yield src, fn 527 yield src, fn
491 528
492 def changes(self, files = None, match = util.always): 529 def changes(self, files = None, match = util.always):
493 self.read() 530 self.read()
494 dc = self.map.copy() 531 if not files:
532 dc = self.map.copy()
533 else:
534 dc = self.filterfiles(files)
495 lookup, changed, added, unknown = [], [], [], [] 535 lookup, changed, added, unknown = [], [], [], []
496 536
497 for src, fn in self.walk(files, match): 537 for src, fn in self.walk(files, match, dc=dc):
498 try: s = os.stat(os.path.join(self.root, fn)) 538 try: s = os.stat(os.path.join(self.root, fn))
499 except: continue 539 except: continue
500 540
501 if fn in dc: 541 if fn in dc:
502 c = dc[fn] 542 c = dc[fn]