# HG changeset patch
# User Eric Hopper
# Date 1129082207 25200
# Node ID b6d9ea0bc1070f75648ac9b729682a7066968fca
# Parent be6b5ce60b7f7c4bfa22aa1f1084c4704c46f22b
Added a lot of comments to changegroupsubset.
diff -r be6b5ce60b7f -r b6d9ea0bc107 mercurial/localrepo.py
--- a/mercurial/localrepo.py Tue Oct 11 08:39:21 2005 -0700
+++ b/mercurial/localrepo.py Tue Oct 11 18:56:47 2005 -0700
@@ -896,37 +896,80 @@
return remote.addchangegroup(cg)
def changegroupsubset(self, bases, heads):
+ """This function generates a changegroup consisting of all the nodes
+ that are descendents of any of the bases, and ancestors of any of
+ the heads.
+
+ It is fairly complex as determining which filenodes and which
+ manifest nodes need to be included for the changeset to be complete
+ is non-trivial.
+
+ Another wrinkle is doing the reverse, figuring out which changeset in
+ the changegroup a particular filenode or manifestnode belongs to."""
+
+ # Set up some initial variables
+ # Make it easy to refer to self.changelog
cl = self.changelog
- # msng = missing
+ # msng is short for missing - compute the list of changesets in this
+ # changegroup.
msng_cl_lst, bases, heads = cl.nodesbetween(bases, heads)
- junk = None
+ # Some bases may turn out to be superfluous, and some heads may be
+ # too. nodesbetween will return the minimal set of bases and heads
+ # necessary to re-create the changegroup.
+
+ # Known heads are the list of heads that it is assumed the recipient
+ # of this changegroup will know about.
knownheads = {}
+ # We assume that all parents of bases are known heads.
for n in bases:
for p in cl.parents(n):
if p != nullid:
knownheads[p] = 1
knownheads = knownheads.keys()
if knownheads:
+ # Now that we know what heads are known, we can compute which
+ # changesets are known. The recipient must know about all
+ # changesets required to reach the known heads from the null
+ # changeset.
has_cl_set, junk, junk = cl.nodesbetween(None, knownheads)
+ junk = None
+ # Transform the list into an ersatz set.
has_cl_set = dict.fromkeys(has_cl_set)
else:
+ # If there were no known heads, the recipient cannot be assumed to
+ # know about any changesets.
has_cl_set = {}
+ # Make it easy to refer to self.manifest
mnfst = self.manifest
+ # We don't know which manifests are missing yet
msng_mnfst_set = {}
+ # Nor do we know which filenodes are missing.
msng_filenode_set = {}
junk = mnfst.index[mnfst.count() - 1] # Get around a bug in lazyindex
junk = None
+ # A changeset always belongs to itself, so the changenode lookup
+ # function for a changenode is identity.
def identity(x):
return x
+ # A function generating function. Sets up an environment for the
+ # inner function.
def cmp_by_rev_func(revlog):
- def cmpfunc(a, b):
+ # Compare two nodes by their revision number in the environment's
+ # revision history. Since the revision number both represents the
+ # most efficient order to read the nodes in, and represents a
+ # topological sorting of the nodes, this function is often useful.
+ def cmp_by_rev(a, b):
return cmp(revlog.rev(a), revlog.rev(b))
- return cmpfunc
+ return cmp_by_rev
+ # If we determine that a particular file or manifest node must be a
+ # node that the recipient of the changegroup will already have, we can
+ # also assume the recipient will have all the parents. This function
+ # prunes them from the set of missing nodes.
def prune_parents(revlog, hasset, msngset):
haslst = hasset.keys()
haslst.sort(cmp_by_rev_func(revlog))
@@ -941,7 +984,19 @@
for n in hasset:
msngset.pop(n, None)
+ # This is a function generating function used to set up an environment
+ # for the inner function to execute in.
def manifest_and_file_collector(changedfileset):
+ # This is an information gathering function that gathers
+ # information from each changeset node that goes out as part of
+ # the changegroup. The information gathered is a list of which
+ # manifest nodes are potentially required (the recipient may
+ # already have them) and total list of all files which were
+ # changed in any changeset in the changegroup.
+ #
+ # We also remember the first changenode we saw any manifest
+ # referenced by so we can later determine which changenode 'owns'
+ # the manifest.
def collect_manifests_and_files(clnode):
c = cl.read(clnode)
for f in c[3]:
@@ -951,93 +1006,163 @@
msng_mnfst_set.setdefault(c[0], clnode)
return collect_manifests_and_files
+ # Figure out which manifest nodes (of the ones we think might be part
+ # of the changegroup) the recipient must know about and remove them
+ # from the changegroup.
def prune_manifests():
has_mnfst_set = {}
for n in msng_mnfst_set:
+ # If a 'missing' manifest thinks it belongs to a changenode
+ # the recipient is assumed to have, obviously the recipient
+ # must have that manifest.
linknode = cl.node(mnfst.linkrev(n))
if linknode in has_cl_set:
has_mnfst_set[n] = 1
prune_parents(mnfst, has_mnfst_set, msng_mnfst_set)
+ # Use the information collected in collect_manifests_and_files to say
+ # which changenode any manifestnode belongs to.
def lookup_manifest_link(mnfstnode):
return msng_mnfst_set[mnfstnode]
+ # A function generating function that sets up the initial environment
+ # the inner function.
def filenode_collector(changedfiles):
next_rev = [0]
+ # This gathers information from each manifestnode included in the
+ # changegroup about which filenodes the manifest node references
+ # so we can include those in the changegroup too.
+ #
+ # It also remembers which changenode each filenode belongs to. It
+ # does this by assuming the a filenode belongs to the changenode
+ # the first manifest that references it belongs to.
def collect_msng_filenodes(mnfstnode):
r = mnfst.rev(mnfstnode)
if r == next_rev[0]:
# If the last rev we looked at was the one just previous,
# we only need to see a diff.
delta = mdiff.patchtext(mnfst.delta(mnfstnode))
+ # For each line in the delta
for dline in delta.splitlines():
+ # get the filename and filenode for that line
f, fnode = dline.split('\0')
fnode = bin(fnode[:40])
f = changedfiles.get(f, None)
+ # And if the file is in the list of files we care
+ # about.
if f is not None:
+ # Get the changenode this manifest belongs to
+ clnode = msng_mnfst_set[mnfstnode]
+ # Create the set of filenodes for the file if
+ # there isn't one already.
+ ndset = msng_filenode_set.setdefault(f, {})
+ # And set the filenode's changelog node to the
+ # manifest's if it hasn't been set already.
+ ndset.setdefault(fnode, clnode)
+ else:
+ # Otherwise we need a full manifest.
+ m = mnfst.read(mnfstnode)
+ # For every file in we care about.
+ for f in changedfiles:
+ fnode = m.get(f, None)
+ # If it's in the manifest
+ if fnode is not None:
+ # See comments above.
clnode = msng_mnfst_set[mnfstnode]
ndset = msng_filenode_set.setdefault(f, {})
ndset.setdefault(fnode, clnode)
- else:
- m = mnfst.read(mnfstnode)
- for f in changedfiles:
- fnode = m.get(f, None)
- if fnode is not None:
- clnode = msng_mnfst_set[mnfstnode]
- ndset = msng_filenode_set.setdefault(f, {})
- ndset.setdefault(fnode, clnode)
+ # Remember the revision we hope to see next.
next_rev[0] = r + 1
return collect_msng_filenodes
+ # We have a list of filenodes we think we need for a file, lets remove
+ # all those we now the recipient must have.
def prune_filenodes(f, filerevlog):
msngset = msng_filenode_set[f]
hasset = {}
+ # If a 'missing' filenode thinks it belongs to a changenode we
+ # assume the recipient must have, then the recipient must have
+ # that filenode.
for n in msngset:
clnode = cl.node(filerevlog.linkrev(n))
if clnode in has_cl_set:
hasset[n] = 1
prune_parents(filerevlog, hasset, msngset)
+ # A function generator function that sets up the a context for the
+ # inner function.
def lookup_filenode_link_func(fname):
msngset = msng_filenode_set[fname]
+ # Lookup the changenode the filenode belongs to.
def lookup_filenode_link(fnode):
return msngset[fnode]
return lookup_filenode_link
+ # Now that we have all theses utility functions to help out and
+ # logically divide up the task, generate the group.
def gengroup():
+ # The set of changed files starts empty.
changedfiles = {}
+ # Create a changenode group generator that will call our functions
+ # back to lookup the owning changenode and collect information.
group = cl.group(msng_cl_lst, identity,
manifest_and_file_collector(changedfiles))
for chnk in group:
yield chnk
+
+ # The list of manifests has been collected by the generator
+ # calling our functions back.
prune_manifests()
msng_mnfst_lst = msng_mnfst_set.keys()
+ # Sort the manifestnodes by revision number.
msng_mnfst_lst.sort(cmp_by_rev_func(mnfst))
+ # Create a generator for the manifestnodes that calls our lookup
+ # and data collection functions back.
group = mnfst.group(msng_mnfst_lst, lookup_manifest_link,
filenode_collector(changedfiles))
for chnk in group:
yield chnk
+
+ # These are no longer needed, dereference and toss the memory for
+ # them.
msng_mnfst_lst = None
msng_mnfst_set.clear()
+
changedfiles = changedfiles.keys()
changedfiles.sort()
+ # Go through all our files in order sorted by name.
for fname in changedfiles:
filerevlog = self.file(fname)
+ # Toss out the filenodes that the recipient isn't really
+ # missing.
prune_filenodes(fname, filerevlog)
msng_filenode_lst = msng_filenode_set[fname].keys()
+ # If any filenodes are left, generate the group for them,
+ # otherwise don't bother.
if len(msng_filenode_lst) > 0:
yield struct.pack(">l", len(fname) + 4) + fname
+ # Sort the filenodes by their revision #
msng_filenode_lst.sort(cmp_by_rev_func(filerevlog))
+ # Create a group generator and only pass in a changenode
+ # lookup function as we need to collect no information
+ # from filenodes.
group = filerevlog.group(msng_filenode_lst,
lookup_filenode_link_func(fname))
for chnk in group:
yield chnk
+ # Don't need this anymore, toss it to free memory.
del msng_filenode_set[fname]
+ # Signal that no more groups are left.
yield struct.pack(">l", 0)
return util.chunkbuffer(gengroup())
def changegroup(self, basenodes):
+ """Generate a changegroup of all nodes that we have that a recipient
+ doesn't.
+
+ This is much easier than the previous function as we can assume that
+ the recipient has any changenode we aren't sending them."""
cl = self.changelog
nodes = cl.nodesbetween(basenodes, None)[0]
revset = dict.fromkeys([cl.rev(n) for n in nodes])