view src/lib-index/mail-hash.c @ 221:ed0d5b17c7a4 HEAD

Added extra functions for easier printing of error messages. Moved file_set_size() to generic function in lib. If there's no space to build hash file, it builds itself in anon-mmaped memory and stays there.
author Timo Sirainen <tss@iki.fi>
date Fri, 13 Sep 2002 03:01:23 +0300
parents c095607a1a28
children cf4d065f2f85
line wrap: on
line source

/* Copyright (C) 2002 Timo Sirainen */

#include "lib.h"
#include "ioloop.h"
#include "file-set-size.h"
#include "primes.h"
#include "mmap-util.h"
#include "write-full.h"
#include "mail-index.h"
#include "mail-index-util.h"
#include "mail-hash.h"

#include <stdio.h>
#include <fcntl.h>

/* Minimum record count for a hash file. By default, the hash file size is
   the number of messages * 3, and it's rebuilt after the file is 3/4 full.
   Use only primes as hash file sizes. */
#define MIN_HASH_SIZE 109

/* Maximum record count for a hash file. */
#define MAX_HASH_SIZE \
	((INT_MAX - sizeof(MailHashHeader)) / 100)

/* When rebuilding hash, make it 30% full */
#define MIN_PERCENTAGE 30

/* Try rebuilding hash sometimes soon after it's 60% full */
#define REBUILD_PERCENTAGE 60

/* Force a rebuild when hash is 80% full */
#define FORCED_REBUILD_PERCENTAGE 80

/* our hashing function is simple - UID*2. The *2 is there because UIDs are
   normally contiguous, so once the UIDs wrap around, we don't want to go
   through lots of records just to find an empty spot */
#define HASH_FUNC(uid) (uid * 2)

#define HASH_FILE_SIZE(records) \
	(sizeof(MailHashHeader) + (records) * sizeof(MailHashRecord))

struct _MailHash {
	MailIndex *index;

	unsigned int sync_id;
	unsigned int size;

	int fd;
	char *filepath;

	void *mmap_base;
	size_t mmap_length;

	MailHashHeader *header;
	unsigned int anon_mmap:1;
	unsigned int dirty_mmap:1;
	unsigned int modified:1;
};

static int mmap_update_real(MailHash *hash)
{
	i_assert(!hash->anon_mmap);

	if (hash->mmap_base != NULL)
		(void)munmap(hash->mmap_base, hash->mmap_length);

	hash->mmap_base = mmap_rw_file(hash->fd, &hash->mmap_length);
	if (hash->mmap_base == MAP_FAILED) {
		hash->mmap_base = NULL;
		hash->header = NULL;
		index_set_error(hash->index,
				"hash: mmap() failed with file %s: %m",
				hash->filepath);
		return FALSE;
	}

	return TRUE;
}

static int hash_verify_header(MailHash *hash)
{
	if (hash->mmap_length <= sizeof(MailHashHeader) ||
	    (hash->mmap_length - sizeof(MailHashHeader)) %
	    sizeof(MailHashRecord) != 0) {
		/* hash must be corrupted, rebuilding should have noticed
		   if it was only partially written. */
		hash->header = NULL;
		index_set_error(hash->index, "Corrupted hash file %s: "
				"Invalid file size %"PRIuSIZE_T"",
				hash->filepath, hash->mmap_length);
		return FALSE;
	}

	hash->header = hash->mmap_base;
	hash->sync_id = hash->header->sync_id;
	hash->size = (hash->mmap_length - sizeof(MailHashHeader)) /
		sizeof(MailHashRecord);

	if (hash->size < MIN_HASH_SIZE || hash->size > MAX_HASH_SIZE) {
		/* invalid size, probably corrupted. */
		hash->header = NULL;
		index_set_error(hash->index, "Corrupted hash file %s: "
				"Invalid size %u", hash->filepath, hash->size);
		return FALSE;
	}

	hash->dirty_mmap = FALSE;
	return TRUE;
}

static int mmap_update(MailHash *hash)
{
	if (hash->fd == -1)
		return FALSE;

	if (!hash->dirty_mmap) {
		/* see if someone else modified it */
		if (hash->header->sync_id == hash->sync_id)
			return TRUE;
	}

	return mmap_update_real(hash) && hash_verify_header(hash);

}

static MailHash *mail_hash_new(MailIndex *index)
{
	MailHash *hash;

	hash = i_new(MailHash, 1);
	hash->fd = -1;
	hash->index = index;
	hash->filepath = i_strconcat(index->filepath, ".hash", NULL);
	hash->dirty_mmap = TRUE;

	index->hash = hash;
	return hash;
}

int mail_hash_create(MailIndex *index)
{
	i_assert(index->lock_type == MAIL_LOCK_EXCLUSIVE);

	return mail_hash_rebuild(mail_hash_new(index));
}

int mail_hash_open_or_create(MailIndex *index)
{
	MailHash *hash;
	int failed;

	hash = mail_hash_new(index);

	hash->fd = open(hash->filepath, O_RDWR);
	if (hash->fd == -1)
		return mail_hash_rebuild(hash);

	if (!mmap_update_real(hash)) {
		/* mmap() failure is fatal */
		mail_hash_free(hash);
		return FALSE;
	}

	/* make sure the header looks fine */
	failed = TRUE;
	if (hash_verify_header(hash)) {
		if (hash->header->indexid == hash->index->indexid)
			failed = FALSE;
		else {
			index_set_error(hash->index,
					"IndexID mismatch for hash file %s",
					hash->filepath);
		}
	}

	if (failed) {
		/* recreate it */
		(void)munmap(hash->mmap_base, hash->mmap_length);
		hash->mmap_base = NULL;
		hash->dirty_mmap = TRUE;

		return mail_hash_rebuild(hash);
	}

	return TRUE;
}

void mail_hash_free(MailHash *hash)
{
	hash->index->hash = NULL;

	if (hash->mmap_base != NULL) {
		(void)munmap(hash->mmap_base, hash->mmap_length);
		hash->mmap_base = NULL;
	}

	if (hash->fd != -1)
		(void)close(hash->fd);
	i_free(hash->filepath);
	i_free(hash);
}

int mail_hash_sync_file(MailHash *hash)
{
	if (!hash->modified)
		return TRUE;
	hash->modified = FALSE;

	if (hash->anon_mmap ||
	    msync(hash->mmap_base, hash->mmap_length, MS_SYNC) == 0)
		return TRUE;
	else {
		index_set_error(hash->index, "msync() failed for %s: %m",
				hash->filepath);
		return FALSE;
	}
}

static void hash_build(MailIndex *index, void *mmap_base,
		       unsigned int hash_size)
{
	MailHashHeader *hdr;
        MailHashRecord *rec;
	MailIndexHeader *idx_hdr;
	MailIndexRecord *idx_rec;
	unsigned int i, count;

	/* we have empty hash file mmap()ed now. fill it by reading the
	   messages from index. */
	rec = (MailHashRecord *) ((char *) mmap_base + sizeof(MailHashHeader));
        idx_rec = index->lookup(index, 1);
	for (count = 0; idx_rec != NULL; count++) {
		i = HASH_FUNC(idx_rec->uid) % hash_size;
		rec[i].uid = idx_rec->uid;
		rec[i].position = INDEX_FILE_POSITION(index, idx_rec);
		idx_rec = index->next(index, idx_rec);
	}

	idx_hdr = index->get_header(index);
	if (count != idx_hdr->messages_count) {
		/* mark this as an error but don't fail because of it. */
		index_set_corrupted(index, "Missing messages while rebuilding "
				    "hash file - %u found, header says %u",
				    count, idx_hdr->messages_count);
	}

	/* setup header */
	hdr = mmap_base;
	hdr->indexid = index->indexid;
	hdr->sync_id = ioloop_time;
	hdr->used_records = count;
}

static int hash_rebuild_to_file(MailIndex *index, int fd, const char *path,
				unsigned int hash_size)
{
	void *mmap_base;
	size_t mmap_length, new_size;

	i_assert(hash_size < MAX_HASH_SIZE);

	new_size = HASH_FILE_SIZE(hash_size);

	/* fill the file with zeros */
	if (file_set_size(fd, (off_t)new_size) < 0) {
		index_set_error(index, "Failed to fill temp hash to "
				"size %"PRIuSIZE_T": %m", new_size);
		return FALSE;
	}

	/* now, mmap() it */
	mmap_base = mmap_rw_file(fd, &mmap_length);
	if (mmap_base == MAP_FAILED) {
		index_set_error(index,
				"mmap()ing temp hash failed: %m");
		return FALSE;
	}
	i_assert(mmap_length == new_size);

	hash_build(index, mmap_base, hash_size);

	if (msync(mmap_base, mmap_length, MS_SYNC) < 0) {
		index_set_error(index, "msync() failed for temp hash %s: %m",
				path);
		(void)munmap(mmap_base, mmap_length);
		return FALSE;
	}

	(void)munmap(mmap_base, mmap_length);

	/* we don't want to leave partially written hash around */
	if (fsync(fd) < 0) {
		index_set_error(index, "fsync() failed with temp hash %s: %m",
				path);
		return FALSE;
	}

	return TRUE;
}

int mail_hash_rebuild(MailHash *hash)
{
	MailIndexHeader *index_header;
	const char *path;
	unsigned int hash_size;
	int fd, failed;

	if (!hash->index->set_lock(hash->index, MAIL_LOCK_EXCLUSIVE))
		return FALSE;

	/* figure out size for our hash */
	index_header = hash->index->get_header(hash->index);
	hash_size = primes_closest(index_header->messages_count * 100 /
				   MIN_PERCENTAGE);
	if (hash_size < MIN_HASH_SIZE)
		hash_size = MIN_HASH_SIZE;

	if (hash_size < index_header->messages_count ||
	    hash_size > MAX_HASH_SIZE) {
		/* either our calculation overflowed, or we reached the
		   max. value primes_closest() gave us. and there's more
		   mails - very unlikely. */
		index_set_corrupted(hash->index, "Too many mails in mailbox "
				    "(%u)", index_header->messages_count);
		return FALSE;
	}

	/* build the hash in a temp file, renaming it to the real hash
	   once finished */
	fd = mail_index_create_temp_file(hash->index, &path);
	if (fd != -1) {
		failed = !hash_rebuild_to_file(hash->index, fd,
					       path, hash_size);

		if (!failed && rename(path, hash->filepath) < 0) {
			index_set_error(hash->index, "rename(%s, %s) failed: %m",
					path, hash->filepath);
			failed = TRUE;
		}

		if (failed) {
			int old_errno = errno;

			(void)close(fd);
			(void)unlink(path);
			fd = -1;

			errno = old_errno;
		}
	}

	if (fd == -1) {
		/* building hash to file failed. if it was because there
		   was no space in disk, we could just as well keep it in
		   memory */
		if (errno != ENOSPC)
			return FALSE;

		if (hash->mmap_base != NULL)
			(void)munmap(hash->mmap_base, hash->mmap_length);

		hash->mmap_length = HASH_FILE_SIZE(hash_size);
		hash->mmap_base = mmap_anonymous(hash->mmap_length);
		hash_build(hash->index, hash->mmap_base, hash_size);

		/* make sure it doesn't exist anymore */
		(void)unlink(hash->filepath);
	}

	/* switch fds */
	if (hash->fd != -1)
		(void)close(hash->fd);
	hash->fd = fd;
	hash->anon_mmap = fd == -1;
	hash->dirty_mmap = !hash->anon_mmap;
	return TRUE;
}

uoff_t mail_hash_lookup_uid(MailHash *hash, unsigned int uid)
{
        MailHashRecord *rec;
	unsigned int hashidx, idx;

	i_assert(uid > 0);
	i_assert(hash->index->lock_type != MAIL_LOCK_UNLOCK);

	if (!mmap_update(hash))
		return 0;

	hashidx = HASH_FUNC(uid) % hash->size;
	rec = (MailHashRecord *) ((char *) hash->mmap_base +
				  sizeof(MailHashHeader));

	/* check from hash index to end of file */
	for (idx = hashidx; idx < hash->size; idx++) {
		if (rec[idx].uid == uid)
			return rec[idx].position;

		if (rec[idx].uid == 0) {
			/* empty hash record - not found. */
			return 0;
		}
	}

	/* check from beginning of file to hash index */
	for (idx = 0; idx < hashidx; idx++) {
		if (rec[idx].uid == uid)
			return rec[idx].position;

		if (rec[idx].uid == 0) {
			/* empty hash record - not found. */
			return 0;
		}
	}

	/* checked through the whole hash file. this really shouldn't happen,
	   we should have rebuilt it long time ago.. */
	return 0;
}

static MailHashRecord *hash_find_uid_or_free(MailHash *hash, unsigned int uid)
{
        MailHashRecord *rec;
	unsigned int hashidx, idx;

	hashidx = HASH_FUNC(uid) % hash->size;
	rec = (MailHashRecord *) ((char *) hash->mmap_base +
				  sizeof(MailHashHeader));

	/* check from hash index to end of file */
	for (idx = hashidx; idx < hash->size; idx++) {
		if (rec[idx].uid == 0 || rec[idx].uid == uid)
			return rec+idx;
	}

	/* check from beginning of file to hash index */
	for (idx = 0; idx < hashidx; idx++) {
		if (rec[idx].uid == 0 || rec[idx].uid == uid)
			return rec+idx;
	}

	/* hash file is full */
	return NULL;
}

void mail_hash_update(MailHash *hash, unsigned int uid, uoff_t pos)
{
	MailHashRecord *rec;
	unsigned int max_used;

	i_assert(uid > 0);
	i_assert(hash->index->lock_type == MAIL_LOCK_EXCLUSIVE);

	if (!mmap_update(hash))
		return;

	if (hash->header->used_records >
	    hash->size * FORCED_REBUILD_PERCENTAGE / 100) {
		/* we really need a rebuild. */
		mail_hash_rebuild(hash);
	}

	/* place the hash into first free record after wanted position */
	rec = hash_find_uid_or_free(hash, uid);

	if (rec == NULL) {
		/* this should never happen, we had already checked that
		   at least 1/5 of hash was empty. except, if the header
		   contained invalid record count for some reason. rebuild.. */
		i_error("Hash file was 100%% full, rebuilding");
		mail_hash_rebuild(hash);

		rec = hash_find_uid_or_free(hash, uid);
		i_assert(rec != NULL);
	}

	if (pos != 0) {
		/* insert/update record */
		if (rec->uid == 0) {
			/* update records count, and see if hash is
			   getting full */
			max_used = hash->size / 100 * REBUILD_PERCENTAGE;
			if (++hash->header->used_records > max_used) {
				hash->index->set_flags |=
					MAIL_INDEX_FLAG_REBUILD_HASH;
			}
		}
		rec->uid = uid;
		rec->position = pos;
	} else {
		/* delete record */
		rec->uid = 0;
		rec->position = 0;
                hash->header->used_records--;
	}

	hash->modified = FALSE;
}