view mercurial/streamclone.py @ 2612:ffb895f16925

add support for streaming clone. existing clone code uses pull to get changes from remote repo. is very slow, uses lots of memory and cpu. new clone code has server write file data straight to client, client writes file data straight to disk. memory and cpu used are very low, clone is much faster over lan. new client can still clone with pull, can still clone from older servers. new server can still serve older clients.
author Vadim Gelfer <vadim.gelfer@gmail.com>
date Fri, 14 Jul 2006 11:17:22 -0700
parents
children 5a5852a417b1
line wrap: on
line source

# streamclone.py - streaming clone server support for mercurial
#
# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
#
# This software may be used and distributed according to the terms
# of the GNU General Public License, incorporated herein by reference.

from demandload import demandload
from i18n import gettext as _
demandload(globals(), "os stat util")

# if server supports streaming clone, it advertises "stream"
# capability with value that is version+flags of repo it is serving.
# client only streams if it can read that repo format.

def walkrepo(root):
    '''iterate over metadata files in repository.
    walk in natural (sorted) order.
    yields 2-tuples: name of .d or .i file, size of file.'''

    strip_count = len(root) + len(os.sep)
    def walk(path, recurse):
        ents = os.listdir(path)
        ents.sort()
        for e in ents:
            pe = os.path.join(path, e)
            st = os.lstat(pe)
            if stat.S_ISDIR(st.st_mode):
                if recurse:
                    for x in walk(pe, True):
                        yield x
            else:
                if not stat.S_ISREG(st.st_mode) or len(e) < 2:
                    continue
                sfx = e[-2:]
                if sfx in ('.d', '.i'):
                    yield pe[strip_count:], st.st_size
    # write file data first
    for x in walk(os.path.join(root, 'data'), True):
        yield x
    # write manifest before changelog
    meta = list(walk(root, False))
    meta.sort(reverse=True)
    for x in meta:
        yield x

# stream file format is simple.
#
# server writes out line that says how many files, how many total
# bytes.  separator is ascii space, byte counts are strings.
#
# then for each file:
#
#   server writes out line that says file name, how many bytes in
#   file.  separator is ascii nul, byte count is string.
#
#   server writes out raw file data.

def stream_out(repo, fileobj):
    '''stream out all metadata files in repository.
    writes to file-like object, must support write() and optional flush().'''
    # get consistent snapshot of repo. lock during scan so lock not
    # needed while we stream, and commits can happen.
    lock = repo.lock()
    repo.ui.debug('scanning\n')
    entries = []
    total_bytes = 0
    for name, size in walkrepo(repo.path):
        entries.append((name, size))
        total_bytes += size
    lock.release()

    repo.ui.debug('%d files, %d bytes to transfer\n' %
                  (len(entries), total_bytes))
    fileobj.write('%d %d\n' % (len(entries), total_bytes))
    for name, size in entries:
        repo.ui.debug('sending %s (%d bytes)\n' % (name, size))
        fileobj.write('%s\0%d\n' % (name, size))
        for chunk in util.filechunkiter(repo.opener(name), limit=size):
            fileobj.write(chunk)
    flush = getattr(fileobj, 'flush', None)
    if flush: flush()