view src/objstore/include/nomad/objstore_backend.h @ 1246:5427f4e9dcb6

objstore: implement a attach/detach objver to txn helpers This rewrites the previous logic that tried to keep track of minimum truncation size. Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
author Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
date Fri, 16 Dec 2022 19:46:24 -0500
parents c8ed13a24e47
children 40d18d06086a
line wrap: on
line source

/*
 * Copyright (c) 2015-2020,2022 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#ifndef __NOMAD_OBJSTORE_BACKEND_H
#define __NOMAD_OBJSTORE_BACKEND_H

#include <jeffpc/rbtree.h>

#include <nomad/objstore.h>

#define PAGE_SIZE		4096ul
#define PAGE_OFFSET_MASK	(PAGE_SIZE - 1)

enum obj_state {
	OBJ_STATE_NEW = 0,	/* newly allocated */
	OBJ_STATE_LIVE,		/* fully initialized */
	OBJ_STATE_DEAD,		/* initialization failed */
};

struct objstore_clone {
	struct rb_node node_vdev;

	struct objstore *vol;
	struct objstore_vdev *vdev;
	const struct clone_ops *ops;
	struct xuuid uuid;

	void *private;

	/* the following are protected by the volume lock */
	struct rb_tree objs;
};

struct obj {
	/* key */
	struct noid oid;

	/* value */
	struct rb_tree versions;/* cached versions */
	struct rb_tree heads;	/* head versions */
	uint64_t nversions;	/* number of versions */
	void *private;

	/* misc */
	enum obj_state state;
	refcnt_t refcnt;
	struct lock lock;
	struct rb_node node;

	/* constant for the lifetime of the object */
	struct objstore_clone *clone;
	const struct obj_ops *ops;
};

struct objver {
	/* key */
	struct nvclock clock;

	/* value */
	/*
	 * Everything in the attrs structure is used for attribute storage
	 * except for:
	 *   - ino: don't touch (reset to 0)
	 */
	struct nattr attrs;
	void *private;
	/*
	 * Keep track of opens - both qualified and unqualified.
	 *
	 * ->open[0 aka. false] = unqualified
	 * ->open[1 aka. true]  = qualified
	 */
	struct objstore_open_obj_info *open[2];

	/* transaction related state */
	struct {
		struct txn *txn;	 /* active txn */
		int refcnt;		 /* number of attaches */
		struct objver *prev_ver; /* version we cow'd from */
		uint64_t min_data_size;	 /* the shortest truncation so far */
	} txn;

	/* misc */
	struct obj *obj;
	struct rb_node all_node;	/* all versions */
	struct rb_node head_node;	/* head versions */
	struct rb_tree pages;		/* cached pages for this version */
};

struct objstore_open_obj_info {
	/* constant for the lifetime of this struct */
	struct obj *obj;
	bool qualified;

	/* protected by the obj lock */
	struct objver *ver;
	uint32_t open_count;

	struct {
		/* we must cow before allowing writes */
		bool needed;
	} cow;
};

struct txn;

struct txn_entry {
	enum txn_op {
		OP_NOP,
		OP_COW,
		OP_CREATE,
		OP_SETATTR,
		OP_WRITE,
	} op;

	bool logged; /* log entry called */
	bool failed; /* log entry failed */

	int (*perform)(struct txn *txn, struct txn_entry *entry);
	void (*cleanup)(struct txn *txn, struct txn_entry *entry);

	union {
		struct {
			struct objstore_open_obj_info *open;
			struct objver *old;
			struct objver *new;
		} cow;
		struct {
			struct noid oid;
			struct noid parent;
			struct nvclock clock;
			struct nattr attrs;
		} create;
		struct {
			struct objver *ver;
			struct nattr old_attrs;
			unsigned changed;
		} setattr;
		struct {
			struct objver *ver;
			uint64_t pgno;
			size_t pgcnt;
		} write;
	};
};

#define NUM_TXN_ENTRIES	6
struct txn {
	uint64_t id;
	struct objstore_clone *clone;
	struct txn_entry entries[NUM_TXN_ENTRIES];
};

struct obj_ops {
	/*
	 * Determine if a specified version of an object exists.
	 *
	 * This is meant as a lightweight check to avoid allocating an
	 * objver only to have to free it on error.
	 *
	 * If not implemented, the normal getattr call is used all the time.
	 */
	int (*checkversion)(struct obj *obj, const struct nvclock *clock);

	/* Get info for this object */
	int (*info)(struct obj *obj, struct obj_info **infos, size_t *ninfos);

	/* open objects must be closed */
	int (*open)(struct objver *ver);
	int (*close)(struct objver *ver);

	/* create a new object version as a copy of existing version */
	int (*cow)(struct objver *old, const struct nvclock *new);

	int (*getattr)(struct objver *ver, struct nattr *attr);
	int (*setattr)(struct objver *ver, struct nattr *attr,
		       const unsigned changed);
	int (*read_page)(struct objver *ver, void *pgdata, uint64_t pgno);
	int (*write_page)(struct objver *ver, const void *pgdata,
			  uint64_t pgno);

	/*
	 * Called just before the generic object is freed.
	 */
	void (*free)(struct obj *obj);
};

struct clone_ops {
	int (*getroot)(struct objstore_clone *clone, struct noid *root);
	/*
	 * Initialize an object.
	 *
	 * At a minimum, the backend must:
	 *  - set obj->nversions
	 *  - set obj->ops
	 *  - add head versions to obj->versions
	 *
	 * Typically, the backend also:
	 *  - sets obj->private
	 */
	int (*initobj)(struct obj *obj);
	/*
	 * Allocate a new oid.  This is used when the generic code wants to
	 * allocate a new object (e.g., because of a create(2) call).  This
	 * is a separate function from object creation because the actual
	 * object creation is done at transaction commit and we need the
	 * oid before then (e.g., to construct the correct directory entry).
	 */
	int (*allocoid)(struct objstore_clone *clone, struct noid *new);
	/*
	 * Create a new object.  The new object will have a single version
	 * which will have attributes as specified.  The parent oid is the
	 * oid of the directory containing the new object.
	 */
	int (*createobj)(struct objstore_clone *clone, const struct noid *oid,
			 const struct nvclock *clock, const struct nattr *attrs,
			 const struct noid *parent);

	int (*txn_begin)(struct txn *txn);
	int (*txn_log_entry)(struct txn *txn, struct txn_entry *entry);
	int (*txn_commit)(struct txn *txn);
	int (*txn_complete)(struct txn *txn);
	void (*txn_abort)(struct txn *txn);
};

struct objstore_vdev_def {
	const char *name;
	size_t vdev_private_size;
	size_t clone_private_size;

	int (*create_vdev)(struct objstore_vdev *vdev);
	/*
	 * create_clone has two modes of operation depending on whether on
	 * not it is given a root object host & uniq.
	 *
	 * 1. without (host = 0, uniq = 0):
	 *     creates a new volume, allocating a new root object and
	 *     initializing it to the empty directory
	 * 2. with (host != 0, uniq != 0):
	 *     creates a new volume, without allocating a new root object
	 *     but using the supplied root host/uniq as the root
	 *
	 * The first is used to create completely new file system.  The
	 * second is used to initialize a clone of an existing file system
	 * identified.
	 */
	int (*create_clone)(struct objstore_vdev *vdev,
			    const struct xuuid *volid,
			    uint64_t root_host, uint64_t root_uniq);
	int (*import_vdev)(struct objstore_vdev *vdev);
	int (*import_clone)(struct objstore_clone *clone);
};

extern struct objstore_clone *vdev_add_clone(struct objstore_vdev *vdev,
					     const struct xuuid *volid);

extern struct objver *obj_add_version(struct obj *obj,
				      const struct nvclock *clock);

extern int obj_make_dir_buffer(uint64_t this_host, uint64_t this_uniq,
			       uint64_t parent_host, uint64_t parent_uniq,
			       struct buffer *buf);

#endif