view src/lib-storage/index/mbox/mbox-lock.c @ 6857:41911abe6fa7 HEAD

NFS cache flushing updates.
author Timo Sirainen <tss@iki.fi>
date Sun, 25 Nov 2007 15:47:36 +0200
parents dbab5e592577
children 7ed926ed7aa4
line wrap: on
line source

/* Copyright (c) 2002-2007 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "nfs-workarounds.h"
#include "mail-index-private.h"
#include "mbox-storage.h"
#include "mbox-file.h"
#include "mbox-lock.h"

#include <time.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>

#ifdef HAVE_FLOCK
#  include <sys/file.h>
#endif

/* 0.1 .. 0.2msec */
#define LOCK_RANDOM_USLEEP_TIME (100000 + (unsigned int)rand() % 100000)

/* lock methods to use in wanted order */
#define DEFAULT_READ_LOCK_METHODS "fcntl"
#define DEFAULT_WRITE_LOCK_METHODS "dotlock fcntl"
/* lock timeout */
#define MBOX_DEFAULT_LOCK_TIMEOUT (5*60)
/* assume stale dotlock if mbox file hasn't changed for n seconds */
#define DEFAULT_DOTLOCK_CHANGE_TIMEOUT (120)

enum mbox_lock_type {
	MBOX_LOCK_DOTLOCK,
	MBOX_LOCK_DOTLOCK_TRY,
	MBOX_LOCK_FCNTL,
	MBOX_LOCK_FLOCK,
	MBOX_LOCK_LOCKF,

	MBOX_LOCK_COUNT
};

struct mbox_lock_context {
	struct mbox_mailbox *mbox;
	int lock_status[MBOX_LOCK_COUNT];
	bool checked_file;

	int lock_type;
	bool dotlock_last_stale;
	bool fcntl_locked;
};

struct mbox_lock_data {
	enum mbox_lock_type type;
	const char *name;
	int (*func)(struct mbox_lock_context *ctx, int lock_type,
		    time_t max_wait_time);
};

static int mbox_lock_dotlock(struct mbox_lock_context *ctx, int lock_type,
			     time_t max_wait_time);
static int mbox_lock_dotlock_try(struct mbox_lock_context *ctx, int lock_type,
				 time_t max_wait_time);
static int mbox_lock_fcntl(struct mbox_lock_context *ctx, int lock_type,
			   time_t max_wait_time);
#ifdef HAVE_FLOCK
static int mbox_lock_flock(struct mbox_lock_context *ctx, int lock_type,
			   time_t max_wait_time);
#else
#  define mbox_lock_flock NULL
#endif
#ifdef HAVE_LOCKF
static int mbox_lock_lockf(struct mbox_lock_context *ctx, int lock_type,
			   time_t max_wait_time);
#else
#  define mbox_lock_lockf NULL
#endif

struct mbox_lock_data lock_data[] = {
	{ MBOX_LOCK_DOTLOCK, "dotlock", mbox_lock_dotlock },
	{ MBOX_LOCK_DOTLOCK_TRY, "dotlock_try", mbox_lock_dotlock_try },
	{ MBOX_LOCK_FCNTL, "fcntl", mbox_lock_fcntl },
	{ MBOX_LOCK_FLOCK, "flock", mbox_lock_flock },
	{ MBOX_LOCK_LOCKF, "lockf", mbox_lock_lockf },
	{ 0, NULL, NULL }
};

static bool lock_settings_initialized = FALSE;
static enum mbox_lock_type read_locks[MBOX_LOCK_COUNT+1];
static enum mbox_lock_type write_locks[MBOX_LOCK_COUNT+1];
static int lock_timeout, dotlock_change_timeout;

static int mbox_lock_list(struct mbox_lock_context *ctx, int lock_type,
			  time_t max_wait_time, int idx);
static int mbox_unlock_files(struct mbox_lock_context *ctx);

static void mbox_read_lock_methods(const char *str, const char *env,
				   enum mbox_lock_type *locks)
{
        enum mbox_lock_type type;
	const char *const *lock;
	int i, dest;

	for (lock = t_strsplit(str, " "), dest = 0; *lock != NULL; lock++) {
		for (type = 0; lock_data[type].name != NULL; type++) {
			if (strcasecmp(*lock, lock_data[type].name) == 0) {
				type = lock_data[type].type;
				break;
			}
		}
		if (lock_data[type].name == NULL)
			i_fatal("%s: Invalid value %s", env, *lock);
		if (lock_data[type].func == NULL) {
			i_fatal("%s: Support for lock type %s "
				"not compiled into binary", env, *lock);
		}

		for (i = 0; i < dest; i++) {
			if (locks[i] == type)
				i_fatal("%s: Duplicated value %s", env, *lock);
		}

		/* @UNSAFE */
		locks[dest++] = type;
	}
	locks[dest] = (enum mbox_lock_type)-1;
}

static void mbox_init_lock_settings(void)
{
	const char *str;
	int r, w;

	str = getenv("MBOX_READ_LOCKS");
	if (str == NULL) str = DEFAULT_READ_LOCK_METHODS;
	mbox_read_lock_methods(str, "MBOX_READ_LOCKS", read_locks);

	str = getenv("MBOX_WRITE_LOCKS");
	if (str == NULL) str = DEFAULT_WRITE_LOCK_METHODS;
	mbox_read_lock_methods(str, "MBOX_WRITE_LOCKS", write_locks);

	/* check that read/write list orders match. write_locks must contain
	   at least read_locks and possibly more. */
	for (r = w = 0; write_locks[w] != (enum mbox_lock_type)-1; w++) {
		if (read_locks[r] == (enum mbox_lock_type)-1)
			break;
		if (read_locks[r] == write_locks[w])
			r++;
	}
	if (read_locks[r] != (enum mbox_lock_type)-1) {
		i_fatal("mbox read/write lock list settings are invalid. "
			"Lock ordering must be the same with both, "
			"and write locks must contain all read locks "
			"(and possibly more)");
	}

	str = getenv("MBOX_LOCK_TIMEOUT");
	lock_timeout = str == NULL ? MBOX_DEFAULT_LOCK_TIMEOUT : atoi(str);

	str = getenv("MBOX_DOTLOCK_CHANGE_TIMEOUT");
	dotlock_change_timeout = str == NULL ?
		DEFAULT_DOTLOCK_CHANGE_TIMEOUT : atoi(str);

        lock_settings_initialized = TRUE;
}

static int mbox_file_open_latest(struct mbox_lock_context *ctx, int lock_type)
{
	struct mbox_mailbox *mbox = ctx->mbox;
	struct stat st;

	if (ctx->checked_file || lock_type == F_UNLCK)
		return 0;

	if (mbox->mbox_fd != -1) {
		if ((mbox->storage->storage.flags &
		     MAIL_STORAGE_FLAG_NFS_FLUSH_STORAGE) != 0)
			nfs_flush_file_handle_cache(mbox->path);
		if (nfs_safe_stat(mbox->path, &st) < 0) {
			mbox_set_syscall_error(mbox, "stat()");
			return -1;
		}

		if (st.st_ino != mbox->mbox_ino ||
		    !CMP_DEV_T(st.st_dev, mbox->mbox_dev))
			mbox_file_close(mbox);
	}

	if (mbox->mbox_fd == -1) {
		if (mbox_file_open(mbox) < 0)
			return -1;
	}

	ctx->checked_file = TRUE;
	return 0;
}

static bool dotlock_callback(unsigned int secs_left, bool stale, void *context)
{
        struct mbox_lock_context *ctx = context;
	enum mbox_lock_type *lock_types;
	int i;

	if (stale && !ctx->dotlock_last_stale) {
		/* get next index we wish to try locking. it's the one after
		   dotlocking. */
		lock_types = ctx->lock_type == F_WRLCK ||
			(ctx->lock_type == F_UNLCK &&
			 ctx->mbox->mbox_lock_type == F_WRLCK) ?
			write_locks : read_locks;

		for (i = 0; lock_types[i] != (enum mbox_lock_type)-1; i++) {
			if (lock_types[i] == MBOX_LOCK_DOTLOCK)
				break;
		}

		if (lock_types[i] != (enum mbox_lock_type)-1 &&
		    lock_types[i+1] != (enum mbox_lock_type)-1) {
			i++;
			if (mbox_lock_list(ctx, ctx->lock_type, 0, i) <= 0) {
				/* we couldn't get fd lock -
				   it's really locked */
				ctx->dotlock_last_stale = TRUE;
				return FALSE;
			}
			(void)mbox_lock_list(ctx, F_UNLCK, 0, i);
		}
	}
	ctx->dotlock_last_stale = stale;

	index_storage_lock_notify(&ctx->mbox->ibox, stale ?
				  MAILBOX_LOCK_NOTIFY_MAILBOX_OVERRIDE :
				  MAILBOX_LOCK_NOTIFY_MAILBOX_ABORT,
				  secs_left);
	return TRUE;
}

static int
mbox_lock_dotlock_int(struct mbox_lock_context *ctx, int lock_type, bool try)
{
	struct mbox_mailbox *mbox = ctx->mbox;
	struct dotlock_settings set;
	int ret;

	if (lock_type == F_UNLCK) {
		if (!mbox->mbox_dotlocked)
			return 1;

		if (file_dotlock_delete(&mbox->mbox_dotlock) <= 0) {
			mbox_set_syscall_error(mbox, "file_dotlock_delete()");
			ret = -1;
		}
                mbox->mbox_dotlocked = FALSE;
		return 1;
	}

	if (mbox->mbox_dotlocked)
		return 1;

        ctx->dotlock_last_stale = -1;

	memset(&set, 0, sizeof(set));
	set.use_excl_lock = (mbox->storage->storage.flags &
			     MAIL_STORAGE_FLAG_DOTLOCK_USE_EXCL) != 0;
	set.nfs_flush = (mbox->storage->storage.flags &
			 MAIL_STORAGE_FLAG_NFS_FLUSH_STORAGE) != 0;
	set.timeout = lock_timeout;
	set.stale_timeout = dotlock_change_timeout;
	set.callback = dotlock_callback;
	set.context = ctx;

	ret = file_dotlock_create(&set, mbox->path, 0, &mbox->mbox_dotlock);
	if (ret < 0) {
		if ((ENOSPACE(errno) || errno == EACCES) && try)
			return 1;

		mbox_set_syscall_error(mbox, "file_lock_dotlock()");
		return -1;
	}
	if (ret == 0) {
		mail_storage_set_error(&mbox->storage->storage,
			MAIL_ERROR_TEMP, MAIL_ERRSTR_LOCK_TIMEOUT);
		return 0;
	}
	mbox->mbox_dotlocked = TRUE;

	if (mbox_file_open_latest(ctx, lock_type) < 0)
		return -1;
	return 1;
}

static int mbox_lock_dotlock(struct mbox_lock_context *ctx, int lock_type,
			     time_t max_wait_time ATTR_UNUSED)
{
	return mbox_lock_dotlock_int(ctx, lock_type, FALSE);
}

static int mbox_lock_dotlock_try(struct mbox_lock_context *ctx, int lock_type,
				 time_t max_wait_time ATTR_UNUSED)
{
	return mbox_lock_dotlock_int(ctx, lock_type, TRUE);
}

#ifdef HAVE_FLOCK
static int mbox_lock_flock(struct mbox_lock_context *ctx, int lock_type,
			   time_t max_wait_time)
{
	time_t now, last_notify;

	if (mbox_file_open_latest(ctx, lock_type) < 0)
		return -1;

	if (lock_type == F_UNLCK && ctx->mbox->mbox_fd == -1)
		return 1;

	if (lock_type == F_WRLCK)
		lock_type = LOCK_EX;
	else if (lock_type == F_RDLCK)
		lock_type = LOCK_SH;
	else
		lock_type = LOCK_UN;

        last_notify = 0;
	while (flock(ctx->mbox->mbox_fd, lock_type | LOCK_NB) < 0) {
		if (errno != EWOULDBLOCK) {
			mbox_set_syscall_error(ctx->mbox, "flock()");
			return -1;
		}

		now = time(NULL);
		if (now >= max_wait_time)
			return 0;

		if (now != last_notify) {
			index_storage_lock_notify(&ctx->mbox->ibox,
				MAILBOX_LOCK_NOTIFY_MAILBOX_ABORT,
				max_wait_time - now);
		}

		usleep(LOCK_RANDOM_USLEEP_TIME);
	}

	return 1;
}
#endif

#ifdef HAVE_LOCKF
static int mbox_lock_lockf(struct mbox_lock_context *ctx, int lock_type,
			   time_t max_wait_time)
{
	time_t now, last_notify;

	if (mbox_file_open_latest(ctx, lock_type) < 0)
		return -1;

	if (lock_type == F_UNLCK && ctx->mbox->mbox_fd == -1)
		return 1;

	if (lock_type != F_UNLCK)
		lock_type = F_TLOCK;
	else
		lock_type = F_ULOCK;

        last_notify = 0;
	while (lockf(ctx->mbox->mbox_fd, lock_type, 0) < 0) {
		if (errno != EAGAIN) {
			mbox_set_syscall_error(ctx->mbox, "lockf()");
			return -1;
		}

		now = time(NULL);
		if (now >= max_wait_time)
			return 0;

		if (now != last_notify) {
			index_storage_lock_notify(&ctx->mbox->ibox,
				MAILBOX_LOCK_NOTIFY_MAILBOX_ABORT,
				max_wait_time - now);
		}

		usleep(LOCK_RANDOM_USLEEP_TIME);
	}

	return 1;
}
#endif

static int mbox_lock_fcntl(struct mbox_lock_context *ctx, int lock_type,
			   time_t max_wait_time)
{
	struct flock fl;
	time_t now;
	unsigned int next_alarm;
	int wait_type;

	if (mbox_file_open_latest(ctx, lock_type) < 0)
		return -1;

	if (lock_type == F_UNLCK && ctx->mbox->mbox_fd == -1)
		return 1;

	memset(&fl, 0, sizeof(fl));
	fl.l_type = lock_type;
	fl.l_whence = SEEK_SET;
	fl.l_start = 0;
	fl.l_len = 0;

	if (max_wait_time == 0) {
		/* usually we're waiting here, but if we came from
		   mbox_lock_dotlock(), we just want to try locking */
		wait_type = F_SETLK;
	} else {
		wait_type = F_SETLKW;
		now = time(NULL);
		if (now >= max_wait_time)
			alarm(1);
		else
			alarm(I_MIN(max_wait_time - now, 5));
	}

	while (fcntl(ctx->mbox->mbox_fd, wait_type, &fl) < 0) {
		if (errno != EINTR) {
			if ((errno == EACCES || errno == EAGAIN) &&
			    wait_type == F_SETLK) {
				/* non-blocking lock trying failed */
				return 0;
			}
			mbox_set_syscall_error(ctx->mbox, "fcntl()");
			alarm(0);
			return -1;
		}

		now = time(NULL);
		if (now >= max_wait_time) {
			alarm(0);
			return 0;
		}

		/* notify locks once every 5 seconds.
		   try to use rounded values. */
		next_alarm = (max_wait_time - now) % 5;
		if (next_alarm == 0)
			next_alarm = 5;
		alarm(next_alarm);

		index_storage_lock_notify(&ctx->mbox->ibox,
					  MAILBOX_LOCK_NOTIFY_MAILBOX_ABORT,
					  max_wait_time - now);
	}

	alarm(0);
	ctx->fcntl_locked = TRUE;
	return 1;
}

static int mbox_lock_list(struct mbox_lock_context *ctx, int lock_type,
			  time_t max_wait_time, int idx)
{
	enum mbox_lock_type *lock_types;
        enum mbox_lock_type type;
	int i, ret = 0, lock_status;

	ctx->lock_type = lock_type;

	lock_types = lock_type == F_WRLCK ||
		(lock_type == F_UNLCK && ctx->mbox->mbox_lock_type == F_WRLCK) ?
		write_locks : read_locks;
	for (i = idx; lock_types[i] != (enum mbox_lock_type)-1; i++) {
		type = lock_types[i];
		lock_status = lock_type != F_UNLCK;

		if (ctx->lock_status[type] == lock_status)
			continue;
		ctx->lock_status[type] = lock_status;

		ret = lock_data[type].func(ctx, lock_type, max_wait_time);
		if (ret <= 0)
			break;
	}
	return ret;
}

static int mbox_update_locking(struct mbox_mailbox *mbox, int lock_type,
			       bool *fcntl_locked_r)
{
	struct mbox_lock_context ctx;
	time_t max_wait_time;
	int ret, i;
	bool drop_locks;

	*fcntl_locked_r = FALSE;

        index_storage_lock_notify_reset(&mbox->ibox);

	if (!lock_settings_initialized)
                mbox_init_lock_settings();

	if (mbox->mbox_fd == -1 && mbox->mbox_file_stream != NULL) {
		/* read-only mbox stream. no need to lock. */
		i_assert(mbox->mbox_readonly);
		mbox->mbox_lock_type = lock_type;
		return 1;
	}

	max_wait_time = time(NULL) + lock_timeout;

	memset(&ctx, 0, sizeof(ctx));
	ctx.mbox = mbox;

	if (mbox->mbox_lock_type == F_WRLCK) {
		/* dropping to shared lock. first drop those that we
		   don't remove completely. */
		for (i = 0; i < MBOX_LOCK_COUNT; i++)
			ctx.lock_status[i] = 1;
		for (i = 0; read_locks[i] != (enum mbox_lock_type)-1; i++)
			ctx.lock_status[read_locks[i]] = 0;
		drop_locks = TRUE;
	} else {
		drop_locks = FALSE;
	}

	mbox->mbox_lock_type = lock_type;
	ret = mbox_lock_list(&ctx, lock_type, max_wait_time, 0);
	if (ret <= 0) {
		if (!drop_locks)
			(void)mbox_unlock_files(&ctx);
		if (ret == 0) {
			mail_storage_set_error(&mbox->storage->storage,
				MAIL_ERROR_TEMP, MAIL_ERRSTR_LOCK_TIMEOUT);
		}
		return ret;
	}

	if (drop_locks) {
		/* dropping to shared lock: drop the locks that are only
		   in write list */
		memset(ctx.lock_status, 0, sizeof(ctx.lock_status));
		for (i = 0; write_locks[i] != (enum mbox_lock_type)-1; i++)
			ctx.lock_status[write_locks[i]] = 1;
		for (i = 0; read_locks[i] != (enum mbox_lock_type)-1; i++)
			ctx.lock_status[read_locks[i]] = 0;

		mbox->mbox_lock_type = F_WRLCK;
		(void)mbox_lock_list(&ctx, F_UNLCK, 0, 0);
		mbox->mbox_lock_type = F_RDLCK;
	}

	*fcntl_locked_r = ctx.fcntl_locked;
	return 1;
}

int mbox_lock(struct mbox_mailbox *mbox, int lock_type,
	      unsigned int *lock_id_r)
{
	bool fcntl_locked;
	int ret;

	/* allow only unlock -> shared/exclusive or exclusive -> shared */
	i_assert(lock_type == F_RDLCK || lock_type == F_WRLCK);
	i_assert(lock_type == F_RDLCK || mbox->mbox_lock_type != F_RDLCK);

	/* mbox must be locked before index */
	i_assert(mbox->ibox.index->lock_type != F_WRLCK);

	if (mbox->mbox_lock_type == F_UNLCK) {
		ret = mbox_update_locking(mbox, lock_type, &fcntl_locked);
		if (ret <= 0)
			return ret;

		if ((mbox->storage->storage.flags &
		     MAIL_STORAGE_FLAG_NFS_FLUSH_STORAGE) != 0) {
			if (fcntl_locked) {
				nfs_flush_attr_cache_fd_locked(mbox->path,
							       mbox->mbox_fd);
				nfs_flush_read_cache_locked(mbox->path,
							    mbox->mbox_fd);
			} else {
				nfs_flush_attr_cache_unlocked(mbox->path);
				nfs_flush_read_cache_unlocked(mbox->path,
							      mbox->mbox_fd);
			}
		}

		mbox->mbox_lock_id += 2;
	}

	if (lock_type == F_RDLCK) {
		mbox->mbox_shared_locks++;
		*lock_id_r = mbox->mbox_lock_id;
	} else {
		mbox->mbox_excl_locks++;
		*lock_id_r = mbox->mbox_lock_id + 1;
	}
	return 1;
}

static int mbox_unlock_files(struct mbox_lock_context *ctx)
{
	int ret = 0;

	if (mbox_lock_list(ctx, F_UNLCK, 0, 0) < 0)
		ret = -1;

	ctx->mbox->mbox_lock_id += 2;
	ctx->mbox->mbox_lock_type = F_UNLCK;
	return ret;
}

int mbox_unlock(struct mbox_mailbox *mbox, unsigned int lock_id)
{
	struct mbox_lock_context ctx;
	bool fcntl_locked;
	int i;

	i_assert(mbox->mbox_lock_id == (lock_id & ~1));

	if (lock_id & 1) {
		/* dropping exclusive lock */
		i_assert(mbox->mbox_excl_locks > 0);
		if (--mbox->mbox_excl_locks > 0)
			return 0;
		if (mbox->mbox_shared_locks > 0) {
			/* drop to shared lock */
			if (mbox_update_locking(mbox, F_RDLCK,
						&fcntl_locked) < 0)
				return -1;
			return 0;
		}
	} else {
		/* dropping shared lock */
		i_assert(mbox->mbox_shared_locks > 0);
		if (--mbox->mbox_shared_locks > 0)
			return 0;
		if (mbox->mbox_excl_locks > 0)
			return 0;
	}
	/* all locks gone */

	memset(&ctx, 0, sizeof(ctx));
	ctx.mbox = mbox;

	for (i = 0; i < MBOX_LOCK_COUNT; i++)
		ctx.lock_status[i] = 1;

	return mbox_unlock_files(&ctx);
}