view src/lib-storage/index/mbox/mbox-lock.c @ 7316:258477a3f1e7 HEAD

Don't try to flush NFS file handle cache for mbox files.
author Timo Sirainen <tss@iki.fi>
date Sun, 02 Mar 2008 06:34:34 +0200
parents 7ed926ed7aa4
children af998ae4254b
line wrap: on
line source

/* Copyright (c) 2002-2008 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "nfs-workarounds.h"
#include "mail-index-private.h"
#include "mbox-storage.h"
#include "mbox-file.h"
#include "mbox-lock.h"

#include <time.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>

#ifdef HAVE_FLOCK
#  include <sys/file.h>
#endif

/* 0.1 .. 0.2msec */
#define LOCK_RANDOM_USLEEP_TIME (100000 + (unsigned int)rand() % 100000)

/* lock methods to use in wanted order */
#define DEFAULT_READ_LOCK_METHODS "fcntl"
#define DEFAULT_WRITE_LOCK_METHODS "dotlock fcntl"
/* lock timeout */
#define MBOX_DEFAULT_LOCK_TIMEOUT (5*60)
/* assume stale dotlock if mbox file hasn't changed for n seconds */
#define DEFAULT_DOTLOCK_CHANGE_TIMEOUT (120)

enum mbox_lock_type {
	MBOX_LOCK_DOTLOCK,
	MBOX_LOCK_DOTLOCK_TRY,
	MBOX_LOCK_FCNTL,
	MBOX_LOCK_FLOCK,
	MBOX_LOCK_LOCKF,

	MBOX_LOCK_COUNT
};

struct mbox_lock_context {
	struct mbox_mailbox *mbox;
	int lock_status[MBOX_LOCK_COUNT];
	bool checked_file;

	int lock_type;
	bool dotlock_last_stale;
	bool fcntl_locked;
};

struct mbox_lock_data {
	enum mbox_lock_type type;
	const char *name;
	int (*func)(struct mbox_lock_context *ctx, int lock_type,
		    time_t max_wait_time);
};

static int mbox_lock_dotlock(struct mbox_lock_context *ctx, int lock_type,
			     time_t max_wait_time);
static int mbox_lock_dotlock_try(struct mbox_lock_context *ctx, int lock_type,
				 time_t max_wait_time);
static int mbox_lock_fcntl(struct mbox_lock_context *ctx, int lock_type,
			   time_t max_wait_time);
#ifdef HAVE_FLOCK
static int mbox_lock_flock(struct mbox_lock_context *ctx, int lock_type,
			   time_t max_wait_time);
#else
#  define mbox_lock_flock NULL
#endif
#ifdef HAVE_LOCKF
static int mbox_lock_lockf(struct mbox_lock_context *ctx, int lock_type,
			   time_t max_wait_time);
#else
#  define mbox_lock_lockf NULL
#endif

struct mbox_lock_data lock_data[] = {
	{ MBOX_LOCK_DOTLOCK, "dotlock", mbox_lock_dotlock },
	{ MBOX_LOCK_DOTLOCK_TRY, "dotlock_try", mbox_lock_dotlock_try },
	{ MBOX_LOCK_FCNTL, "fcntl", mbox_lock_fcntl },
	{ MBOX_LOCK_FLOCK, "flock", mbox_lock_flock },
	{ MBOX_LOCK_LOCKF, "lockf", mbox_lock_lockf },
	{ 0, NULL, NULL }
};

static bool lock_settings_initialized = FALSE;
static enum mbox_lock_type read_locks[MBOX_LOCK_COUNT+1];
static enum mbox_lock_type write_locks[MBOX_LOCK_COUNT+1];
static int lock_timeout, dotlock_change_timeout;

static int mbox_lock_list(struct mbox_lock_context *ctx, int lock_type,
			  time_t max_wait_time, int idx);
static int mbox_unlock_files(struct mbox_lock_context *ctx);

static void mbox_read_lock_methods(const char *str, const char *env,
				   enum mbox_lock_type *locks)
{
        enum mbox_lock_type type;
	const char *const *lock;
	int i, dest;

	for (lock = t_strsplit(str, " "), dest = 0; *lock != NULL; lock++) {
		for (type = 0; lock_data[type].name != NULL; type++) {
			if (strcasecmp(*lock, lock_data[type].name) == 0) {
				type = lock_data[type].type;
				break;
			}
		}
		if (lock_data[type].name == NULL)
			i_fatal("%s: Invalid value %s", env, *lock);
		if (lock_data[type].func == NULL) {
			i_fatal("%s: Support for lock type %s "
				"not compiled into binary", env, *lock);
		}

		for (i = 0; i < dest; i++) {
			if (locks[i] == type)
				i_fatal("%s: Duplicated value %s", env, *lock);
		}

		/* @UNSAFE */
		locks[dest++] = type;
	}
	locks[dest] = (enum mbox_lock_type)-1;
}

static void mbox_init_lock_settings(void)
{
	const char *str;
	int r, w;

	str = getenv("MBOX_READ_LOCKS");
	if (str == NULL) str = DEFAULT_READ_LOCK_METHODS;
	mbox_read_lock_methods(str, "MBOX_READ_LOCKS", read_locks);

	str = getenv("MBOX_WRITE_LOCKS");
	if (str == NULL) str = DEFAULT_WRITE_LOCK_METHODS;
	mbox_read_lock_methods(str, "MBOX_WRITE_LOCKS", write_locks);

	/* check that read/write list orders match. write_locks must contain
	   at least read_locks and possibly more. */
	for (r = w = 0; write_locks[w] != (enum mbox_lock_type)-1; w++) {
		if (read_locks[r] == (enum mbox_lock_type)-1)
			break;
		if (read_locks[r] == write_locks[w])
			r++;
	}
	if (read_locks[r] != (enum mbox_lock_type)-1) {
		i_fatal("mbox read/write lock list settings are invalid. "
			"Lock ordering must be the same with both, "
			"and write locks must contain all read locks "
			"(and possibly more)");
	}

	str = getenv("MBOX_LOCK_TIMEOUT");
	lock_timeout = str == NULL ? MBOX_DEFAULT_LOCK_TIMEOUT : atoi(str);

	str = getenv("MBOX_DOTLOCK_CHANGE_TIMEOUT");
	dotlock_change_timeout = str == NULL ?
		DEFAULT_DOTLOCK_CHANGE_TIMEOUT : atoi(str);

        lock_settings_initialized = TRUE;
}

static int mbox_file_open_latest(struct mbox_lock_context *ctx, int lock_type)
{
	struct mbox_mailbox *mbox = ctx->mbox;
	struct stat st;

	if (ctx->checked_file || lock_type == F_UNLCK)
		return 0;

	if (mbox->mbox_fd != -1) {
		/* we could flush NFS file handle cache here if we wanted to
		   be sure that the file is latest, but mbox files get rarely
		   deleted and the flushing might cause errors (e.g. EBUSY for
		   trying to flush a /var/mail mountpoint) */
		if (nfs_safe_stat(mbox->path, &st) < 0) {
			mbox_set_syscall_error(mbox, "stat()");
			return -1;
		}

		if (st.st_ino != mbox->mbox_ino ||
		    !CMP_DEV_T(st.st_dev, mbox->mbox_dev))
			mbox_file_close(mbox);
	}

	if (mbox->mbox_fd == -1) {
		if (mbox_file_open(mbox) < 0)
			return -1;
	}

	ctx->checked_file = TRUE;
	return 0;
}

static bool dotlock_callback(unsigned int secs_left, bool stale, void *context)
{
        struct mbox_lock_context *ctx = context;
	enum mbox_lock_type *lock_types;
	int i;

	if (stale && !ctx->dotlock_last_stale) {
		/* get next index we wish to try locking. it's the one after
		   dotlocking. */
		lock_types = ctx->lock_type == F_WRLCK ||
			(ctx->lock_type == F_UNLCK &&
			 ctx->mbox->mbox_lock_type == F_WRLCK) ?
			write_locks : read_locks;

		for (i = 0; lock_types[i] != (enum mbox_lock_type)-1; i++) {
			if (lock_types[i] == MBOX_LOCK_DOTLOCK)
				break;
		}

		if (lock_types[i] != (enum mbox_lock_type)-1 &&
		    lock_types[i+1] != (enum mbox_lock_type)-1) {
			i++;
			if (mbox_lock_list(ctx, ctx->lock_type, 0, i) <= 0) {
				/* we couldn't get fd lock -
				   it's really locked */
				ctx->dotlock_last_stale = TRUE;
				return FALSE;
			}
			(void)mbox_lock_list(ctx, F_UNLCK, 0, i);
		}
	}
	ctx->dotlock_last_stale = stale;

	index_storage_lock_notify(&ctx->mbox->ibox, stale ?
				  MAILBOX_LOCK_NOTIFY_MAILBOX_OVERRIDE :
				  MAILBOX_LOCK_NOTIFY_MAILBOX_ABORT,
				  secs_left);
	return TRUE;
}

static int
mbox_lock_dotlock_int(struct mbox_lock_context *ctx, int lock_type, bool try)
{
	struct mbox_mailbox *mbox = ctx->mbox;
	struct dotlock_settings set;
	int ret;

	if (lock_type == F_UNLCK) {
		if (!mbox->mbox_dotlocked)
			return 1;

		if (file_dotlock_delete(&mbox->mbox_dotlock) <= 0) {
			mbox_set_syscall_error(mbox, "file_dotlock_delete()");
			ret = -1;
		}
                mbox->mbox_dotlocked = FALSE;
		return 1;
	}

	if (mbox->mbox_dotlocked)
		return 1;

        ctx->dotlock_last_stale = -1;

	memset(&set, 0, sizeof(set));
	set.use_excl_lock = (mbox->storage->storage.flags &
			     MAIL_STORAGE_FLAG_DOTLOCK_USE_EXCL) != 0;
	set.nfs_flush = (mbox->storage->storage.flags &
			 MAIL_STORAGE_FLAG_NFS_FLUSH_STORAGE) != 0;
	set.timeout = lock_timeout;
	set.stale_timeout = dotlock_change_timeout;
	set.callback = dotlock_callback;
	set.context = ctx;

	ret = file_dotlock_create(&set, mbox->path, 0, &mbox->mbox_dotlock);
	if (ret < 0) {
		if ((ENOSPACE(errno) || errno == EACCES) && try)
			return 1;

		mbox_set_syscall_error(mbox, "file_lock_dotlock()");
		return -1;
	}
	if (ret == 0) {
		mail_storage_set_error(&mbox->storage->storage,
			MAIL_ERROR_TEMP, MAIL_ERRSTR_LOCK_TIMEOUT);
		return 0;
	}
	mbox->mbox_dotlocked = TRUE;

	if (mbox_file_open_latest(ctx, lock_type) < 0)
		return -1;
	return 1;
}

static int mbox_lock_dotlock(struct mbox_lock_context *ctx, int lock_type,
			     time_t max_wait_time ATTR_UNUSED)
{
	return mbox_lock_dotlock_int(ctx, lock_type, FALSE);
}

static int mbox_lock_dotlock_try(struct mbox_lock_context *ctx, int lock_type,
				 time_t max_wait_time ATTR_UNUSED)
{
	return mbox_lock_dotlock_int(ctx, lock_type, TRUE);
}

#ifdef HAVE_FLOCK
static int mbox_lock_flock(struct mbox_lock_context *ctx, int lock_type,
			   time_t max_wait_time)
{
	time_t now, last_notify;

	if (mbox_file_open_latest(ctx, lock_type) < 0)
		return -1;

	if (lock_type == F_UNLCK && ctx->mbox->mbox_fd == -1)
		return 1;

	if (lock_type == F_WRLCK)
		lock_type = LOCK_EX;
	else if (lock_type == F_RDLCK)
		lock_type = LOCK_SH;
	else
		lock_type = LOCK_UN;

        last_notify = 0;
	while (flock(ctx->mbox->mbox_fd, lock_type | LOCK_NB) < 0) {
		if (errno != EWOULDBLOCK) {
			mbox_set_syscall_error(ctx->mbox, "flock()");
			return -1;
		}

		now = time(NULL);
		if (now >= max_wait_time)
			return 0;

		if (now != last_notify) {
			index_storage_lock_notify(&ctx->mbox->ibox,
				MAILBOX_LOCK_NOTIFY_MAILBOX_ABORT,
				max_wait_time - now);
		}

		usleep(LOCK_RANDOM_USLEEP_TIME);
	}

	return 1;
}
#endif

#ifdef HAVE_LOCKF
static int mbox_lock_lockf(struct mbox_lock_context *ctx, int lock_type,
			   time_t max_wait_time)
{
	time_t now, last_notify;

	if (mbox_file_open_latest(ctx, lock_type) < 0)
		return -1;

	if (lock_type == F_UNLCK && ctx->mbox->mbox_fd == -1)
		return 1;

	if (lock_type != F_UNLCK)
		lock_type = F_TLOCK;
	else
		lock_type = F_ULOCK;

        last_notify = 0;
	while (lockf(ctx->mbox->mbox_fd, lock_type, 0) < 0) {
		if (errno != EAGAIN) {
			mbox_set_syscall_error(ctx->mbox, "lockf()");
			return -1;
		}

		now = time(NULL);
		if (now >= max_wait_time)
			return 0;

		if (now != last_notify) {
			index_storage_lock_notify(&ctx->mbox->ibox,
				MAILBOX_LOCK_NOTIFY_MAILBOX_ABORT,
				max_wait_time - now);
		}

		usleep(LOCK_RANDOM_USLEEP_TIME);
	}

	return 1;
}
#endif

static int mbox_lock_fcntl(struct mbox_lock_context *ctx, int lock_type,
			   time_t max_wait_time)
{
	struct flock fl;
	time_t now;
	unsigned int next_alarm;
	int wait_type;

	if (mbox_file_open_latest(ctx, lock_type) < 0)
		return -1;

	if (lock_type == F_UNLCK && ctx->mbox->mbox_fd == -1)
		return 1;

	memset(&fl, 0, sizeof(fl));
	fl.l_type = lock_type;
	fl.l_whence = SEEK_SET;
	fl.l_start = 0;
	fl.l_len = 0;

	if (max_wait_time == 0) {
		/* usually we're waiting here, but if we came from
		   mbox_lock_dotlock(), we just want to try locking */
		wait_type = F_SETLK;
	} else {
		wait_type = F_SETLKW;
		now = time(NULL);
		if (now >= max_wait_time)
			alarm(1);
		else
			alarm(I_MIN(max_wait_time - now, 5));
	}

	while (fcntl(ctx->mbox->mbox_fd, wait_type, &fl) < 0) {
		if (errno != EINTR) {
			if ((errno == EACCES || errno == EAGAIN) &&
			    wait_type == F_SETLK) {
				/* non-blocking lock trying failed */
				return 0;
			}
			mbox_set_syscall_error(ctx->mbox, "fcntl()");
			alarm(0);
			return -1;
		}

		now = time(NULL);
		if (now >= max_wait_time) {
			alarm(0);
			return 0;
		}

		/* notify locks once every 5 seconds.
		   try to use rounded values. */
		next_alarm = (max_wait_time - now) % 5;
		if (next_alarm == 0)
			next_alarm = 5;
		alarm(next_alarm);

		index_storage_lock_notify(&ctx->mbox->ibox,
					  MAILBOX_LOCK_NOTIFY_MAILBOX_ABORT,
					  max_wait_time - now);
	}

	alarm(0);
	ctx->fcntl_locked = TRUE;
	return 1;
}

static int mbox_lock_list(struct mbox_lock_context *ctx, int lock_type,
			  time_t max_wait_time, int idx)
{
	enum mbox_lock_type *lock_types;
        enum mbox_lock_type type;
	int i, ret = 0, lock_status;

	ctx->lock_type = lock_type;

	lock_types = lock_type == F_WRLCK ||
		(lock_type == F_UNLCK && ctx->mbox->mbox_lock_type == F_WRLCK) ?
		write_locks : read_locks;
	for (i = idx; lock_types[i] != (enum mbox_lock_type)-1; i++) {
		type = lock_types[i];
		lock_status = lock_type != F_UNLCK;

		if (ctx->lock_status[type] == lock_status)
			continue;
		ctx->lock_status[type] = lock_status;

		ret = lock_data[type].func(ctx, lock_type, max_wait_time);
		if (ret <= 0)
			break;
	}
	return ret;
}

static int mbox_update_locking(struct mbox_mailbox *mbox, int lock_type,
			       bool *fcntl_locked_r)
{
	struct mbox_lock_context ctx;
	time_t max_wait_time;
	int ret, i;
	bool drop_locks;

	*fcntl_locked_r = FALSE;

        index_storage_lock_notify_reset(&mbox->ibox);

	if (!lock_settings_initialized)
                mbox_init_lock_settings();

	if (mbox->mbox_fd == -1 && mbox->mbox_file_stream != NULL) {
		/* read-only mbox stream. no need to lock. */
		i_assert(mbox->mbox_readonly);
		mbox->mbox_lock_type = lock_type;
		return 1;
	}

	max_wait_time = time(NULL) + lock_timeout;

	memset(&ctx, 0, sizeof(ctx));
	ctx.mbox = mbox;

	if (mbox->mbox_lock_type == F_WRLCK) {
		/* dropping to shared lock. first drop those that we
		   don't remove completely. */
		for (i = 0; i < MBOX_LOCK_COUNT; i++)
			ctx.lock_status[i] = 1;
		for (i = 0; read_locks[i] != (enum mbox_lock_type)-1; i++)
			ctx.lock_status[read_locks[i]] = 0;
		drop_locks = TRUE;
	} else {
		drop_locks = FALSE;
	}

	mbox->mbox_lock_type = lock_type;
	ret = mbox_lock_list(&ctx, lock_type, max_wait_time, 0);
	if (ret <= 0) {
		if (!drop_locks)
			(void)mbox_unlock_files(&ctx);
		if (ret == 0) {
			mail_storage_set_error(&mbox->storage->storage,
				MAIL_ERROR_TEMP, MAIL_ERRSTR_LOCK_TIMEOUT);
		}
		return ret;
	}

	if (drop_locks) {
		/* dropping to shared lock: drop the locks that are only
		   in write list */
		memset(ctx.lock_status, 0, sizeof(ctx.lock_status));
		for (i = 0; write_locks[i] != (enum mbox_lock_type)-1; i++)
			ctx.lock_status[write_locks[i]] = 1;
		for (i = 0; read_locks[i] != (enum mbox_lock_type)-1; i++)
			ctx.lock_status[read_locks[i]] = 0;

		mbox->mbox_lock_type = F_WRLCK;
		(void)mbox_lock_list(&ctx, F_UNLCK, 0, 0);
		mbox->mbox_lock_type = F_RDLCK;
	}

	*fcntl_locked_r = ctx.fcntl_locked;
	return 1;
}

int mbox_lock(struct mbox_mailbox *mbox, int lock_type,
	      unsigned int *lock_id_r)
{
	bool fcntl_locked;
	int ret;

	/* allow only unlock -> shared/exclusive or exclusive -> shared */
	i_assert(lock_type == F_RDLCK || lock_type == F_WRLCK);
	i_assert(lock_type == F_RDLCK || mbox->mbox_lock_type != F_RDLCK);

	/* mbox must be locked before index */
	i_assert(mbox->ibox.index->lock_type != F_WRLCK);

	if (mbox->mbox_lock_type == F_UNLCK) {
		ret = mbox_update_locking(mbox, lock_type, &fcntl_locked);
		if (ret <= 0)
			return ret;

		if ((mbox->storage->storage.flags &
		     MAIL_STORAGE_FLAG_NFS_FLUSH_STORAGE) != 0) {
			if (fcntl_locked) {
				nfs_flush_attr_cache_fd_locked(mbox->path,
							       mbox->mbox_fd);
				nfs_flush_read_cache_locked(mbox->path,
							    mbox->mbox_fd);
			} else {
				nfs_flush_attr_cache_unlocked(mbox->path);
				nfs_flush_read_cache_unlocked(mbox->path,
							      mbox->mbox_fd);
			}
		}

		mbox->mbox_lock_id += 2;
	}

	if (lock_type == F_RDLCK) {
		mbox->mbox_shared_locks++;
		*lock_id_r = mbox->mbox_lock_id;
	} else {
		mbox->mbox_excl_locks++;
		*lock_id_r = mbox->mbox_lock_id + 1;
	}
	return 1;
}

static int mbox_unlock_files(struct mbox_lock_context *ctx)
{
	int ret = 0;

	if (mbox_lock_list(ctx, F_UNLCK, 0, 0) < 0)
		ret = -1;

	ctx->mbox->mbox_lock_id += 2;
	ctx->mbox->mbox_lock_type = F_UNLCK;
	return ret;
}

int mbox_unlock(struct mbox_mailbox *mbox, unsigned int lock_id)
{
	struct mbox_lock_context ctx;
	bool fcntl_locked;
	int i;

	i_assert(mbox->mbox_lock_id == (lock_id & ~1));

	if (lock_id & 1) {
		/* dropping exclusive lock */
		i_assert(mbox->mbox_excl_locks > 0);
		if (--mbox->mbox_excl_locks > 0)
			return 0;
		if (mbox->mbox_shared_locks > 0) {
			/* drop to shared lock */
			if (mbox_update_locking(mbox, F_RDLCK,
						&fcntl_locked) < 0)
				return -1;
			return 0;
		}
	} else {
		/* dropping shared lock */
		i_assert(mbox->mbox_shared_locks > 0);
		if (--mbox->mbox_shared_locks > 0)
			return 0;
		if (mbox->mbox_excl_locks > 0)
			return 0;
	}
	/* all locks gone */

	memset(&ctx, 0, sizeof(ctx));
	ctx.mbox = mbox;

	for (i = 0; i < MBOX_LOCK_COUNT; i++)
		ctx.lock_status[i] = 1;

	return mbox_unlock_files(&ctx);
}