linux/fs/orangefs/orangefs-utils.c

// SPDX-License-Identifier: GPL-2.0
/*
 * (C) 2001 Clemson University and The University of Chicago
 * Copyright 2018 Omnibond Systems, L.L.C.
 *
 * See COPYING in top-level directory.
 */
#include <linux/kernel.h>
#include "protocol.h"
#include "orangefs-kernel.h"
#include "orangefs-dev-proto.h"
#include "orangefs-bufmap.h"

__s32 fsid_of_op(struct orangefs_kernel_op_s *op)
{
	__s32 fsid = ORANGEFS_FS_ID_NULL;

	if (op) {
		switch (op->upcall.type) {
		case ORANGEFS_VFS_OP_FILE_IO:
			fsid = op->upcall.req.io.refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_LOOKUP:
			fsid = op->upcall.req.lookup.parent_refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_CREATE:
			fsid = op->upcall.req.create.parent_refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_GETATTR:
			fsid = op->upcall.req.getattr.refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_REMOVE:
			fsid = op->upcall.req.remove.parent_refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_MKDIR:
			fsid = op->upcall.req.mkdir.parent_refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_READDIR:
			fsid = op->upcall.req.readdir.refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_SETATTR:
			fsid = op->upcall.req.setattr.refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_SYMLINK:
			fsid = op->upcall.req.sym.parent_refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_RENAME:
			fsid = op->upcall.req.rename.old_parent_refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_STATFS:
			fsid = op->upcall.req.statfs.fs_id;
			break;
		case ORANGEFS_VFS_OP_TRUNCATE:
			fsid = op->upcall.req.truncate.refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_RA_FLUSH:
			fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_FS_UMOUNT:
			fsid = op->upcall.req.fs_umount.fs_id;
			break;
		case ORANGEFS_VFS_OP_GETXATTR:
			fsid = op->upcall.req.getxattr.refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_SETXATTR:
			fsid = op->upcall.req.setxattr.refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_LISTXATTR:
			fsid = op->upcall.req.listxattr.refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_REMOVEXATTR:
			fsid = op->upcall.req.removexattr.refn.fs_id;
			break;
		case ORANGEFS_VFS_OP_FSYNC:
			fsid = op->upcall.req.fsync.refn.fs_id;
			break;
		default:
			break;
		}
	}
	return fsid;
}

static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
{
	int flags = 0;
	if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
		flags |= S_IMMUTABLE;
	else
		flags &= ~S_IMMUTABLE;
	if (attrs->flags & ORANGEFS_APPEND_FL)
		flags |= S_APPEND;
	else
		flags &= ~S_APPEND;
	if (attrs->flags & ORANGEFS_NOATIME_FL)
		flags |= S_NOATIME;
	else
		flags &= ~S_NOATIME;
	return flags;
}

static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
{
	int perm_mode = 0;

	if (attrs->perms & ORANGEFS_O_EXECUTE)
		perm_mode |= S_IXOTH;
	if (attrs->perms & ORANGEFS_O_WRITE)
		perm_mode |= S_IWOTH;
	if (attrs->perms & ORANGEFS_O_READ)
		perm_mode |= S_IROTH;

	if (attrs->perms & ORANGEFS_G_EXECUTE)
		perm_mode |= S_IXGRP;
	if (attrs->perms & ORANGEFS_G_WRITE)
		perm_mode |= S_IWGRP;
	if (attrs->perms & ORANGEFS_G_READ)
		perm_mode |= S_IRGRP;

	if (attrs->perms & ORANGEFS_U_EXECUTE)
		perm_mode |= S_IXUSR;
	if (attrs->perms & ORANGEFS_U_WRITE)
		perm_mode |= S_IWUSR;
	if (attrs->perms & ORANGEFS_U_READ)
		perm_mode |= S_IRUSR;

	if (attrs->perms & ORANGEFS_G_SGID)
		perm_mode |= S_ISGID;
	if (attrs->perms & ORANGEFS_U_SUID)
		perm_mode |= S_ISUID;

	return perm_mode;
}

/*
 * NOTE: in kernel land, we never use the sys_attr->link_target for
 * anything, so don't bother copying it into the sys_attr object here.
 */
static inline void copy_attributes_from_inode(struct inode *inode,
    struct ORANGEFS_sys_attr_s *attrs)
{
	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
	attrs->mask = 0;
	if (orangefs_inode->attr_valid & ATTR_UID) {
		attrs->owner = from_kuid(&init_user_ns, inode->i_uid);
		attrs->mask |= ORANGEFS_ATTR_SYS_UID;
		gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
	}
	if (orangefs_inode->attr_valid & ATTR_GID) {
		attrs->group = from_kgid(&init_user_ns, inode->i_gid);
		attrs->mask |= ORANGEFS_ATTR_SYS_GID;
		gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
	}

	if (orangefs_inode->attr_valid & ATTR_ATIME) {
		attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
		if (orangefs_inode->attr_valid & ATTR_ATIME_SET) {
			attrs->atime = (time64_t) inode_get_atime_sec(inode);
			attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
		}
	}
	if (orangefs_inode->attr_valid & ATTR_MTIME) {
		attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
		if (orangefs_inode->attr_valid & ATTR_MTIME_SET) {
			attrs->mtime = (time64_t) inode_get_mtime_sec(inode);
			attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
		}
	}
	if (orangefs_inode->attr_valid & ATTR_CTIME)
		attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;

	/*
	 * ORANGEFS cannot set size with a setattr operation. Probably not
	 * likely to be requested through the VFS, but just in case, don't
	 * worry about ATTR_SIZE
	 */

	if (orangefs_inode->attr_valid & ATTR_MODE) {
		attrs->perms = ORANGEFS_util_translate_mode(inode->i_mode);
		attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
	}
}

static int orangefs_inode_type(enum orangefs_ds_type objtype)
{
	if (objtype == ORANGEFS_TYPE_METAFILE)
		return S_IFREG;
	else if (objtype == ORANGEFS_TYPE_DIRECTORY)
		return S_IFDIR;
	else if (objtype == ORANGEFS_TYPE_SYMLINK)
		return S_IFLNK;
	else
		return -1;
}

static void orangefs_make_bad_inode(struct inode *inode)
{
	if (is_root_handle(inode)) {
		/*
		 * if this occurs, the pvfs2-client-core was killed but we
		 * can't afford to lose the inode operations and such
		 * associated with the root handle in any case.
		 */
		gossip_debug(GOSSIP_UTILS_DEBUG,
			     "*** NOT making bad root inode %pU\n",
			     get_khandle_from_ino(inode));
	} else {
		gossip_debug(GOSSIP_UTILS_DEBUG,
			     "*** making bad inode %pU\n",
			     get_khandle_from_ino(inode));
		make_bad_inode(inode);
	}
}

static int orangefs_inode_is_stale(struct inode *inode,
    struct ORANGEFS_sys_attr_s *attrs, char *link_target)
{
	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
	int type = orangefs_inode_type(attrs->objtype);
	/*
	 * If the inode type or symlink target have changed then this
	 * inode is stale.
	 */
	if (type == -1 || inode_wrong_type(inode, type)) {
		orangefs_make_bad_inode(inode);
		return 1;
	}
	if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
	    link_target, ORANGEFS_NAME_MAX)) {
		orangefs_make_bad_inode(inode);
		return 1;
	}
	return 0;
}

int orangefs_inode_getattr(struct inode *inode, int flags)
{
	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
	struct orangefs_kernel_op_s *new_op;
	loff_t inode_size;
	int ret, type;

	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU flags %d\n",
	    __func__, get_khandle_from_ino(inode), flags);

again:
	spin_lock(&inode->i_lock);
	/* Must have all the attributes in the mask and be within cache time. */
	if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) ||
	    orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) {
		if (orangefs_inode->attr_valid) {
			spin_unlock(&inode->i_lock);
			write_inode_now(inode, 1);
			goto again;
		}
		spin_unlock(&inode->i_lock);
		return 0;
	}
	spin_unlock(&inode->i_lock);

	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
	if (!new_op)
		return -ENOMEM;
	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
	/*
	 * Size is the hardest attribute to get.  The incremental cost of any
	 * other attribute is essentially zero.
	 */
	if (flags)
		new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
	else
		new_op->upcall.req.getattr.mask =
		    ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;

	ret = service_operation(new_op, __func__,
	    get_interruptible_flag(inode));
	if (ret != 0)
		goto out;

again2:
	spin_lock(&inode->i_lock);
	/* Must have all the attributes in the mask and be within cache time. */
	if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) ||
	    orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) {
		if (orangefs_inode->attr_valid) {
			spin_unlock(&inode->i_lock);
			write_inode_now(inode, 1);
			goto again2;
		}
		if (inode->i_state & I_DIRTY_PAGES) {
			ret = 0;
			goto out_unlock;
		}
		gossip_debug(GOSSIP_UTILS_DEBUG, "%s: in cache or dirty\n",
		    __func__);
		ret = 0;
		goto out_unlock;
	}

	if (!(flags & ORANGEFS_GETATTR_NEW)) {
		ret = orangefs_inode_is_stale(inode,
		    &new_op->downcall.resp.getattr.attributes,
		    new_op->downcall.resp.getattr.link_target);
		if (ret) {
			ret = -ESTALE;
			goto out_unlock;
		}
	}

	type = orangefs_inode_type(new_op->
	    downcall.resp.getattr.attributes.objtype);
	switch (type) {
	case S_IFREG:
		inode->i_flags = orangefs_inode_flags(&new_op->
		    downcall.resp.getattr.attributes);
		if (flags) {
			inode_size = (loff_t)new_op->
			    downcall.resp.getattr.attributes.size;
			inode->i_size = inode_size;
			inode->i_blkbits = ffs(new_op->downcall.resp.getattr.
			    attributes.blksize);
			inode->i_bytes = inode_size;
			inode->i_blocks =
			    (inode_size + 512 - inode_size % 512)/512;
		}
		break;
	case S_IFDIR:
		if (flags) {
			inode->i_size = PAGE_SIZE;
			inode_set_bytes(inode, inode->i_size);
		}
		set_nlink(inode, 1);
		break;
	case S_IFLNK:
		if (flags & ORANGEFS_GETATTR_NEW) {
			inode->i_size = (loff_t)strlen(new_op->
			    downcall.resp.getattr.link_target);
			ret = strscpy(orangefs_inode->link_target,
			    new_op->downcall.resp.getattr.link_target,
			    ORANGEFS_NAME_MAX);
			if (ret == -E2BIG) {
				ret = -EIO;
				goto out_unlock;
			}
			inode->i_link = orangefs_inode->link_target;
		}
		break;
	/* i.e. -1 */
	default:
		/* XXX: ESTALE?  This is what is done if it is not new. */
		orangefs_make_bad_inode(inode);
		ret = -ESTALE;
		goto out_unlock;
	}

	inode->i_uid = make_kuid(&init_user_ns, new_op->
	    downcall.resp.getattr.attributes.owner);
	inode->i_gid = make_kgid(&init_user_ns, new_op->
	    downcall.resp.getattr.attributes.group);
	inode_set_atime(inode,
			(time64_t)new_op->downcall.resp.getattr.attributes.atime,
			0);
	inode_set_mtime(inode,
			(time64_t)new_op->downcall.resp.getattr.attributes.mtime,
			0);
	inode_set_ctime(inode,
			(time64_t)new_op->downcall.resp.getattr.attributes.ctime,
			0);

	/* special case: mark the root inode as sticky */
	inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
	    orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);

	orangefs_inode->getattr_time = jiffies +
	    orangefs_getattr_timeout_msecs*HZ/1000;
	ret = 0;
out_unlock:
	spin_unlock(&inode->i_lock);
out:
	op_release(new_op);
	return ret;
}

int orangefs_inode_check_changed(struct inode *inode)
{
	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
	struct orangefs_kernel_op_s *new_op;
	int ret;

	gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
	    get_khandle_from_ino(inode));

	new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
	if (!new_op)
		return -ENOMEM;
	new_op->upcall.req.getattr.refn = orangefs_inode->refn;
	new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
	    ORANGEFS_ATTR_SYS_LNK_TARGET;

	ret = service_operation(new_op, __func__,
	    get_interruptible_flag(inode));
	if (ret != 0)
		goto out;

	ret = orangefs_inode_is_stale(inode,
	    &new_op->downcall.resp.getattr.attributes,
	    new_op->downcall.resp.getattr.link_target);
out:
	op_release(new_op);
	return ret;
}

/*
 * issues a orangefs setattr request to make sure the new attribute values
 * take effect if successful.  returns 0 on success; -errno otherwise
 */
int orangefs_inode_setattr(struct inode *inode)
{
	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
	struct orangefs_kernel_op_s *new_op;
	int ret;

	new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
	if (!new_op)
		return -ENOMEM;

	spin_lock(&inode->i_lock);
	new_op->upcall.uid = from_kuid(&init_user_ns, orangefs_inode->attr_uid);
	new_op->upcall.gid = from_kgid(&init_user_ns, orangefs_inode->attr_gid);
	new_op->upcall.req.setattr.refn = orangefs_inode->refn;
	copy_attributes_from_inode(inode,
	    &new_op->upcall.req.setattr.attributes);
	orangefs_inode->attr_valid = 0;
	if (!new_op->upcall.req.setattr.attributes.mask) {
		spin_unlock(&inode->i_lock);
		op_release(new_op);
		return 0;
	}
	spin_unlock(&inode->i_lock);

	ret = service_operation(new_op, __func__,
	    get_interruptible_flag(inode) | ORANGEFS_OP_WRITEBACK);
	gossip_debug(GOSSIP_UTILS_DEBUG,
	    "orangefs_inode_setattr: returning %d\n", ret);
	if (ret)
		orangefs_make_bad_inode(inode);

	op_release(new_op);

	if (ret == 0)
		orangefs_inode->getattr_time = jiffies - 1;
	return ret;
}

/*
 * The following is a very dirty hack that is now a permanent part of the
 * ORANGEFS protocol. See protocol.h for more error definitions.
 */

/* The order matches include/orangefs-types.h in the OrangeFS source. */
static int PINT_errno_mapping[] = {
	0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
	EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
	EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
	ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
	EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
	EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
	ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
	EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
	ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
	EACCES, ECONNRESET, ERANGE
};

int orangefs_normalize_to_errno(__s32 error_code)
{
	__u32 i;

	/* Success */
	if (error_code == 0) {
		return 0;
	/*
	 * This shouldn't ever happen. If it does it should be fixed on the
	 * server.
	 */
	} else if (error_code > 0) {
		gossip_err("orangefs: error status received.\n");
		gossip_err("orangefs: assuming error code is inverted.\n");
		error_code = -error_code;
	}

	/*
	 * XXX: This is very bad since error codes from ORANGEFS may not be
	 * suitable for return into userspace.
	 */

	/*
	 * Convert ORANGEFS error values into errno values suitable for return
	 * from the kernel.
	 */
	if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
		if (((-error_code) &
		    (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
		    ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
			/*
			 * cancellation error codes generally correspond to
			 * a timeout from the client's perspective
			 */
			error_code = -ETIMEDOUT;
		} else {
			/* assume a default error code */
			gossip_err("%s: bad error code :%d:.\n",
				__func__,
				error_code);
			error_code = -EINVAL;
		}

	/* Convert ORANGEFS encoded errno values into regular errno values. */
	} else if ((-error_code) & ORANGEFS_ERROR_BIT) {
		i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
		if (i < ARRAY_SIZE(PINT_errno_mapping))
			error_code = -PINT_errno_mapping[i];
		else
			error_code = -EINVAL;

	/*
	 * Only ORANGEFS protocol error codes should ever come here. Otherwise
	 * there is a bug somewhere.
	 */
	} else {
		gossip_err("%s: unknown error code.\n", __func__);
		error_code = -EINVAL;
	}
	return error_code;
}

#define NUM_MODES 11
__s32 ORANGEFS_util_translate_mode(int mode)
{
	int ret = 0;
	int i = 0;
	static int modes[NUM_MODES] = {
		S_IXOTH, S_IWOTH, S_IROTH,
		S_IXGRP, S_IWGRP, S_IRGRP,
		S_IXUSR, S_IWUSR, S_IRUSR,
		S_ISGID, S_ISUID
	};
	static int orangefs_modes[NUM_MODES] = {
		ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
		ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
		ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
		ORANGEFS_G_SGID, ORANGEFS_U_SUID
	};

	for (i = 0; i < NUM_MODES; i++)
		if (mode & modes[i])
			ret |= orangefs_modes[i];

	return ret;
}
#undef NUM_MODES