// Matrix Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2018 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.

#include <RB_INC_FCNTL_H
#include <RB_INC_SYS_STAT_H
#include <RB_INC_SYS_STATFS_H
#include <RB_INC_SYS_STATVFS_H
#include <RB_INC_SYS_RESOURCE_H
#include <RB_INC_SYS_SYSMACROS_H
#include <boost/filesystem.hpp>

#ifdef IRCD_USE_AIO
	#include "fs_aio.h"
#endif

#ifdef IRCD_USE_IOU
	#include "fs_iou.h"
#endif

// TODO: prevents use until io_uring support implemented
#undef IRCD_USE_IOU

decltype(ircd::fs::log)
ircd::fs::log
{
	"fs"
};

//
// init
//

namespace ircd::fs
{
	extern conf::item<ulong> rlimit_nofile;
	static void update_rlimit_nofile();
	static void init_dump_info();
}

decltype(ircd::fs::rlimit_nofile)
ircd::fs::rlimit_nofile
{
	{
		{ "name",      "ircd.fs.rlimit.nofile"  },
		{ "default",   65535L                   },
		{ "persist",   false                    },
	},
	update_rlimit_nofile
};

//
// init::init
//

ircd::fs::init::init()
{
	init_dump_info();
}

ircd::fs::init::~init()
noexcept
{
}

void
ircd::fs::init_dump_info()
{
	const bool support_async
	{
		false || iou::system || aio::system
	};

	if(support_async)
		log::info
		{
			log, "Asynchronous filesystem IO provided by %s %s.",
			RB_OS,
			iou::system? "io_uring":
			aio::system? "AIO":
			              "?????",
		};
	else
		log::warning
		{
			log, "Support for asynchronous filesystem IO has not been"
			" established. Filesystem IO is degraded to synchronous system calls."
		};
}

#if defined(HAVE_SYS_RESOURCE_H) && defined(RLIMIT_NOFILE)
void
ircd::fs::update_rlimit_nofile()
try
{
	rlimit rlim[2] {0};
	syscall(getrlimit, RLIMIT_NOFILE, &rlim[0]);

	rlim[1] = rlim[0];
	rlim[1].rlim_cur = std::max(rlim[1].rlim_cur, ulong(fs::rlimit_nofile));
	rlim[1].rlim_cur = std::min(rlim[1].rlim_cur, rlim[1].rlim_max);
	if(rlim[0].rlim_cur == rlim[1].rlim_cur)
		return;

	syscall(setrlimit, RLIMIT_NOFILE, &rlim[1]);
	log::info
	{
		log, "Raised resource limit for number of open files from %ld to %ld",
		rlim[0].rlim_cur,
		rlim[1].rlim_cur,
	};
}
catch(const std::system_error &e)
{
	log::warning
	{
		log, "Failed to raise resource limit for number of open files :%s",
		e.what()
	};
}
#else
void
ircd::fs::init_rlimit_nofile()
{
	log::dwarning
	{
		log, "Cannot modify resource limit for number of open files."
	};
}
#endif

///////////////////////////////////////////////////////////////////////////////
//
// fs/support.h
//

decltype(ircd::fs::support::pwritev2)
ircd::fs::support::pwritev2
{
	#if defined(HAVE_PWRITEV2)
		info::kernel_version[0] > 4 ||
		(info::kernel_version[0] >= 4 && info::kernel_version[1] >= 6)
	#else
		false
	#endif
};

decltype(ircd::fs::support::preadv2)
ircd::fs::support::preadv2
{
	#if defined(HAVE_PREADV2)
		info::kernel_version[0] > 4 ||
		(info::kernel_version[0] >= 4 && info::kernel_version[1] >= 6)
	#else
		false
	#endif
};

decltype(ircd::fs::support::sync)
ircd::fs::support::sync
{
	#if defined(HAVE_PWRITEV2) && defined(RWF_SYNC)
		info::kernel_version[0] > 4 ||
		(info::kernel_version[0] >= 4 && info::kernel_version[1] >= 7)
	#else
		false
	#endif
};

decltype(ircd::fs::support::dsync)
ircd::fs::support::dsync
{
	#if defined(HAVE_PWRITEV2) && defined(RWF_DSYNC)
		info::kernel_version[0] > 4 ||
		(info::kernel_version[0] >= 4 && info::kernel_version[1] >= 7)
	#else
		false
	#endif
};

decltype(ircd::fs::support::hipri)
ircd::fs::support::hipri
{
	#if defined(HAVE_PWRITEV2) && defined(RWF_HIPRI)
		info::kernel_version[0] > 4 ||
		(info::kernel_version[0] >= 4 && info::kernel_version[1] >= 6)
	#else
		false
	#endif
};

decltype(ircd::fs::support::nowait)
ircd::fs::support::nowait
{
	#if defined(HAVE_PWRITEV2) && defined(RWF_NOWAIT)
		info::kernel_version[0] > 4 ||
		(info::kernel_version[0] >= 4 && info::kernel_version[1] >= 14)
	#else
		false
	#endif
};

decltype(ircd::fs::support::append)
ircd::fs::support::append
{
	#if defined(HAVE_PWRITEV2) && defined(RWF_APPEND)
		info::kernel_version[0] > 4 ||
		(info::kernel_version[0] >= 4 && info::kernel_version[1] >= 16)
	#else
		false
	#endif
};

decltype(ircd::fs::support::rwh_write_life)
ircd::fs::support::rwh_write_life
{
	#if defined(HAVE_FCNTL_H) && defined(F_SET_FILE_RW_HINT)
		info::kernel_version[0] > 4 ||
		(info::kernel_version[0] >= 4 && info::kernel_version[1] >= 13)
	#else
		false
	#endif
};

decltype(ircd::fs::support::rwf_write_life)
ircd::fs::support::rwf_write_life
{
	#if defined(RWF_WRITE_LIFE_SHIFT)
		false //TODO: XXX
	#else
		false
	#endif
};

decltype(ircd::fs::support::aio)
ircd::fs::support::aio
{
	#ifdef IRCD_USE_AIO
		true
	#else
		false
	#endif
};

void
ircd::fs::support::dump_info()
{
	#if defined(IRCD_USE_AIO) || defined(IRCD_USE_IOU)
		const bool support_async {true};
	#else
		const bool support_async {false};
	#endif

	log::info
	{
		log, "Support: async:%b preadv2:%b pwritev2:%b SYNC:%b DSYNC:%b HIPRI:%b NOWAIT:%b APPEND:%b RWH:%b WLH:%b",
		support_async,
		preadv2,
		pwritev2,
		sync,
		dsync,
		hipri,
		nowait,
		append,
		rwh_write_life,
		rwf_write_life,
	};

	#ifdef RB_DEBUG
	const unique_mutable_buffer buf
	{
		PATH_MAX_LEN + 1
	};

	log::debug
	{
		log, "Current working directory: `%s'", cwd(buf)
	};
	#endif
}

bool
ircd::fs::support::fallocate(const string_view &path,
                             const write_opts &wopts)
try
{
	const fd::opts opts
	{
		std::ios::out
	};

	fs::fd fd
	{
		path, opts
	};

	fs::allocate(fd, info::page_size, wopts);
	return true;
}
catch(const std::system_error &e)
{
	const auto &ec(e.code());
	if(system_category(ec)) switch(ec.value())
	{
		case int(std::errc::invalid_argument):
		case int(std::errc::operation_not_supported):
			return false;

		default:
			break;
	}

	throw;
}

bool
ircd::fs::support::direct_io(const string_view &path)
try
{
	fd::opts opts{std::ios::out};
	opts.direct = true;
	fd{path, opts};
	return true;
}
catch(const std::system_error &e)
{
	const auto &ec(e.code());
	if(system_category(ec)) switch(ec.value())
	{
		case int(std::errc::invalid_argument):
			return false;

		default:
			break;
	}

	throw;
}

#if defined(HAVE_SYS_RESOURCE_H) && defined(RLIMIT_FSIZE)
size_t
ircd::fs::support::rlimit_fsize()
{
	rlimit rlim;
	syscall(getrlimit, RLIMIT_FSIZE, &rlim);
	return rlim.rlim_cur;
}
#else
size_t
ircd::fs::support::rlimit_fize()
{
	return -1;
}
#endif

#if defined(HAVE_SYS_RESOURCE_H) && defined(RLIMIT_NOFILE)
size_t
ircd::fs::support::rlimit_nofile()
{
	rlimit rlim;
	syscall(getrlimit, RLIMIT_NOFILE, &rlim);
	return rlim.rlim_cur;
}
#else
size_t
ircd::fs::support::rlimit_nofile()
{
	return -1;
}
#endif

///////////////////////////////////////////////////////////////////////////////
//
// fs.h / misc
//

bool
ircd::fs::mkdir(const string_view &path)
try
{
	return filesystem::create_directories(_path(path));
}
catch(const filesystem::filesystem_error &e)
{
	throw error{e};
}

bool
ircd::fs::remove(const string_view &path)
try
{
	return filesystem::remove(_path(path));
}
catch(const filesystem::filesystem_error &e)
{
	throw error{e};
}

bool
ircd::fs::remove(std::nothrow_t,
                 const string_view &path)
{
	boost::system::error_code ec;
	return filesystem::remove(_path(path), ec);
}

bool
ircd::fs::rename(const string_view &old,
                 const string_view &new_)
try
{
	filesystem::rename(_path(old), _path(new_));
	return true;
}
catch(const filesystem::filesystem_error &e)
{
	throw error{e};
}

bool
ircd::fs::rename(std::nothrow_t,
                 const string_view &old,
                 const string_view &new_)
{
	boost::system::error_code ec;
	filesystem::rename(_path(old), _path(new_), ec);
	return !ec;
}

std::vector<std::string>
ircd::fs::ls_r(const string_view &path)
try
{
	const filesystem::recursive_directory_iterator end;
	filesystem::recursive_directory_iterator it
	{
		_path(path)
	};

	std::vector<std::string> ret;
	std::for_each(it, end, [&ret]
	(const auto &ent)
	{
		ret.emplace_back(ent.path().string());
	});

	return ret;
}
catch(const filesystem::filesystem_error &e)
{
	throw error{e};
}

std::vector<std::string>
ircd::fs::ls(const string_view &path)
try
{
	static const filesystem::directory_iterator end;
	filesystem::directory_iterator it
	{
		_path(path)
	};

	std::vector<std::string> ret;
	std::for_each(it, end, [&ret]
	(const auto &ent)
	{
		ret.emplace_back(ent.path().string());
	});

	return ret;
}
catch(const filesystem::filesystem_error &e)
{
	throw error{e};
}

size_t
ircd::fs::size(const string_view &path)
try
{
	return filesystem::file_size(_path(path));
}
catch(const filesystem::filesystem_error &e)
{
	throw error{e};
}

bool
ircd::fs::is_reg(const string_view &path)
try
{
	return filesystem::is_regular_file(_path(path));
}
catch(const filesystem::filesystem_error &e)
{
	throw error{e};
}

bool
ircd::fs::is_dir(const string_view &path)
try
{
	return filesystem::is_directory(_path(path));
}
catch(const filesystem::filesystem_error &e)
{
	throw error{e};
}

bool
ircd::fs::exists(const string_view &path)
try
{
	return filesystem::exists(_path(path));
}
catch(const filesystem::filesystem_error &e)
{
	throw error{e};
}

///////////////////////////////////////////////////////////////////////////////
//
// fs/stdin.h
//

ircd::string_view
ircd::fs::stdin::readline(const mutable_buffer &buf)
try
{
	boost::asio::posix::stream_descriptor fd
	{
		ios::get(), dup(STDIN_FILENO)
	};

	boost::asio::streambuf sb
	{
		size(buf)
	};

	const auto interruption{[&fd]
	(ctx::ctx *const &interruptor)
	{
		fd.cancel();
	}};

	size_t len; continuation
	{
		continuation::asio_predicate, interruption, [&len, &fd, &sb]
		(auto &yield)
		{
			len = boost::asio::async_read_until(fd, sb, '\n', yield);
		}
	};

	std::istream is{&sb};
	is.get(data(buf), size(buf), '\n');
	return string_view
	{
		data(buf), size_t(is.gcount())
	};
}
catch(boost::system::system_error &e)
{
	throw_system_error(e.code());
	__builtin_unreachable();
}

//
// tty
//

ircd::fs::stdin::tty::tty()
:fd{[]
{
	thread_local char buf[256];
	syscall(::ttyname_r, STDIN_FILENO, buf, sizeof(buf));
	return fd
	{
		string_view{buf}, std::ios_base::out
	};
}()}
{
}

size_t
ircd::fs::stdin::tty::write(const string_view &buf)
{
	return syscall(::write, int(*this), buf.data(), buf.size());
}

///////////////////////////////////////////////////////////////////////////////
//
// fs/sync.h
//

ircd::fs::sync_opts
const ircd::fs::sync_opts_default;

void
ircd::fs::sync(const fd &fd,
               const off_t &offset,
               const size_t &length,
               const sync_opts &opts)
{
	return sync(fd, opts);
}

void
ircd::fs::sync(const fd &fd,
               const sync_opts &opts)
{
	assert(opts.op == op::SYNC);
	const prof::syscall_usage_warning message
	{
		"fs::sync(fd:%d)", int(fd)
	};

	#ifdef __linux__
		syscall(::syncfs, fd);
	#else
		syscall(::sync);
	#endif
}

void
ircd::fs::flush(const fd &fd,
                const off_t &offset,
                const size_t &length,
                const sync_opts &opts)
{
	return flush(fd, opts);
}

void
ircd::fs::flush(const fd &fd,
                const sync_opts &opts)
{
	assert(opts.op == op::SYNC);

	#ifdef IRCD_USE_IOU
	if(iou::system && opts.aio)
		return iou::fsync(fd, opts);
	#endif

	#ifdef IRCD_USE_AIO
	if(aio::system && opts.aio)
	{
		if(support::aio_fdsync && !opts.metadata)
			return aio::fsync(fd, opts);

		if(support::aio_fsync && opts.metadata)
			return aio::fsync(fd, opts);
	}
	#endif

	const prof::syscall_usage_warning message
	{
		"fs::flush(fd:%d, {metadata:%b aio:%b:%b})",
		int(fd),
		opts.metadata,
		opts.aio,
		opts.metadata? support::aio_fsync : support::aio_fdsync
	};

	if(!opts.metadata)
		return void(syscall(::fdatasync, fd));

	return void(syscall(::fsync, fd));
}

///////////////////////////////////////////////////////////////////////////////
//
// fs/read.h
//

namespace ircd::fs
{
	static bool fincore(void *const &map, const size_t &map_size, uint8_t *const &vec, const size_t &vec_size);
}

ircd::fs::read_opts
const ircd::fs::read_opts_default
{};

size_t
ircd::fs::prefetch(const fd &fd,
                   const size_t &count,
                   const read_opts &opts)
{
	#if defined(POSIX_FADV_WILLNEED)
		return advise(fd, POSIX_FADV_WILLNEED, count, opts);
	#else
		return 0UL;
	#endif
}

bool
ircd::fs::fincore(const fd &fd,
                  const size_t &count,
                  const read_opts &opts)
{
	assert(opts.offset % info::page_size == 0);
	const size_t &map_size
	{
		count?: size(fd)
	};

	void *const &map
	{
		::mmap(nullptr, map_size, PROT_NONE, MAP_NONBLOCK | MAP_SHARED, int(fd), opts.offset)
	};

	if(unlikely(map == MAP_FAILED))
		throw_system_error(errno);

	const custom_ptr<void> map_ptr
	{
		map, [&map_size](void *const &map)
		{
			syscall(::munmap, map, map_size);
		}
	};

	using word_t = unsigned long long;
	thread_local std::array<word_t, 64> tls_vec;

	const size_t vec_size
	{
		std::max(((map_size + info::page_size - 1) / info::page_size) / sizeof(word_t), 1UL)
	};

	assert(vec_size > 0 && vec_size < map_size);
	const auto dynamic_vec_size
	{
		vec_size > tls_vec.size()? vec_size : 0UL
	};

	std::vector<word_t> dynamic_vec(dynamic_vec_size);
	auto *const vec(dynamic_vec_size? dynamic_vec.data(): tls_vec.data());
	return fincore(map, map_size, reinterpret_cast<uint8_t *>(vec), vec_size);
}

bool
ircd::fs::fincore(void *const &map,
                  const size_t &map_size,
                  uint8_t *const &vec,
                  const size_t &vec_size)
{
	syscall(::mincore, map, map_size, vec);
	return std::find(vec, vec + vec_size, 0UL) == vec + vec_size;
}

std::string
ircd::fs::read(const string_view &path,
               const read_opts &opts)
{
	const fd fd
	{
		path
	};

	return read(fd, opts);
}

std::string
ircd::fs::read(const fd &fd,
               const read_opts &opts)
{
	return string(size(fd), [&fd, &opts]
	(const mutable_buffer &buf)
	{
		return read(fd, buf, opts);
	});
}

ircd::const_buffer
ircd::fs::read(const string_view &path,
               const mutable_buffer &buf,
               const read_opts &opts)
{
	const mutable_buffers bufs
	{
		&buf, 1
	};

	return mutable_buffer
	{
		data(buf), read(path, bufs, opts)
	};
}

ircd::const_buffer
ircd::fs::read(const fd &fd,
               const mutable_buffer &buf,
               const read_opts &opts)
{
	const mutable_buffers bufs
	{
		&buf, 1
	};

	return mutable_buffer
	{
		data(buf), read(fd, bufs, opts)
	};
}

size_t
ircd::fs::read(const string_view &path,
               const mutable_buffers &bufs,
               const read_opts &opts)
{
	const fd fd
	{
		path
	};

	return read(fd, bufs, opts);
}

namespace ircd::fs
{
	static int flags(const read_opts &opts);
	static size_t _read_preadv2(const fd &, const const_iovec_view &, const read_opts &);
	static size_t _read_preadv(const fd &, const const_iovec_view &, const read_opts &);
	static size_t read(const fd &, const const_iovec_view &, const read_opts &);
}

/// Read from file descriptor fd into buffers. The number of bytes read into
/// the buffers is returned. By default (via read_opts.all) this call will
/// loop internally until the buffers are full or EOF. To allow for a partial
/// read(), disable read_opts.all. Note that to maintain alignments (i.e when
/// direct-io or for special files read_opts.all must be false). By default
/// (via read_opts.interruptible) this call can throw if the syscall was
/// interrupted before reading any bytes.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstack-usage="
size_t
__attribute__((stack_protect))
ircd::fs::read(const fd &fd,
               const mutable_buffers &bufs,
               const read_opts &opts_)
{
	if(unlikely(bufs.size() > info::iov_max))
		throw error
		{
			make_error_code(std::errc::invalid_argument),
			"Buffer count of %zu exceeds IOV_MAX of %zu",
			bufs.size(),
			info::iov_max
		};

	size_t ret(0);
	read_opts opts(opts_);
	assert(bufs.size() <= info::iov_max);
	struct ::iovec iovbuf[bufs.size()]; do
	{
		assert(opts.offset >= opts_.offset);
		const size_t off(opts.offset - opts_.offset);
		assert(off <= buffers::size(bufs));
		assert(ret <= buffers::size(bufs));
		const auto iov
		{
			make_iov({iovbuf, bufs.size()}, bufs, ret)
		};

		const size_t last
		{
			read(fd, iov, opts)
		};

		if(!opts_.blocking && !last)
			break;

		ret += last;
		if(!opts_.all)
			break;

		if(off >= ret)
			break;

		opts.offset = opts_.offset + ret;
	}
	while(ret < buffers::size(bufs));
	assert(opts.offset >= opts_.offset);
	assert(ret <= buffers::size(bufs));
	return ret;
}
#pragma GCC diagnostic pop

/// Lowest-level'ish read() call. This call only conducts a single operation
/// (no looping) and can return a partial read(). It does have branches
/// for various read_opts. The arguments involve `struct ::iovec` which
/// we do not expose to the ircd.h API; thus this function is internal to
/// ircd::fs. There is no reason to use this function in lieu of the public
/// fs::read() suite.
size_t
ircd::fs::read(const fd &fd,
               const const_iovec_view &iov,
               const read_opts &opts)
{
	assert(opts.op == op::READ);

	#ifdef IRCD_USE_IOU
	if(likely(iou::system && opts.aio))
		return iou::read(fd, iov, opts);
	#endif

	#ifdef IRCD_USE_AIO
	if(likely(aio::system && opts.aio))
		return aio::read(fd, iov, opts);
	#endif

	#ifdef HAVE_PREADV2
	return support::preadv2?
		_read_preadv2(fd, iov, opts):
		_read_preadv(fd, iov, opts);
	#else
	return _read_preadv(fd, iov, opts);
	#endif
}

size_t
ircd::fs::_read_preadv(const fd &fd,
                       const const_iovec_view &iov,
                       const read_opts &opts)
{
	ssize_t ret; do
	{
		ret = ::preadv(int(fd), iov.data(), iov.size(), opts.offset);
	}
	while(!opts.interruptible && unlikely(ret == -1 && errno == EINTR));

	static_assert(EAGAIN == EWOULDBLOCK);
	if(unlikely(!opts.blocking && ret == -1 && errno == EAGAIN))
		return 0UL;

	if(unlikely(ret == -1))
		throw std::system_error
		{
			errno, std::system_category()
		};

	return ret;
}

#ifdef HAVE_PREADV2
size_t
ircd::fs::_read_preadv2(const fd &fd,
                        const const_iovec_view &iov,
                        const read_opts &opts)
{
	const auto &flags_
	{
		flags(opts)
	};

	ssize_t ret; do
	{
		ret = ::preadv2(int(fd), iov.data(), iov.size(), opts.offset, flags_);
	}
	while(!opts.interruptible && unlikely(ret == -1 && errno == EINTR));

	static_assert(EAGAIN == EWOULDBLOCK);
	if(!opts.blocking && ret == -1 && errno == EAGAIN)
		return 0UL;

	if(unlikely(ret == -1))
		throw std::system_error
		{
			errno, std::system_category()
		};

	return ret;
}
#endif HAVE_PREADV2

int
ircd::fs::flags(const read_opts &opts)
{
	int ret{0};

	#if defined(RWF_HIPRI)
	if(support::hipri && reqprio(opts.priority) == reqprio(opts::highest_priority))
		ret |= RWF_HIPRI;
	#endif

	#if defined(RWF_NOWAIT)
	if(support::nowait && !opts.blocking)
		ret |= RWF_NOWAIT;
	#endif

	return ret;
}

///////////////////////////////////////////////////////////////////////////////
//
// fs/write.h
//

ircd::fs::write_opts
const ircd::fs::write_opts_default
{};

void
ircd::fs::allocate(const fd &fd,
                   const size_t &size,
                   const write_opts &opts)
{
	assert(opts.op == op::WRITE);

	int mode{0};

	#ifdef FALLOC_FL_KEEP_SIZE
	mode |= opts.keep_size? FALLOC_FL_KEEP_SIZE : 0;
	#else
	if(opts.keep_size)
		throw_system_error(std::errc::invalid_argument);
	#endif

	#ifdef FALLOC_FL_PUNCH_HOLE
	mode |= opts.punch_hole? FALLOC_FL_PUNCH_HOLE : 0;
	#else
	if(opts.punch_hole)
		throw_system_error(std::errc::invalid_argument);
	#endif

	syscall(::fallocate, fd, mode, opts.offset, size);
}

void
ircd::fs::truncate(const string_view &path,
                   const size_t &size,
                   const write_opts &opts)
{
	const fd fd
	{
		path, std::ios::out | std::ios::trunc
	};

	return truncate(fd, size, opts);
}

void
ircd::fs::truncate(const fd &fd,
                   const size_t &size,
                   const write_opts &opts)
{
	assert(opts.op == op::WRITE);
	syscall(::ftruncate, fd, size);
}

ircd::const_buffer
ircd::fs::overwrite(const string_view &path,
                    const const_buffer &buf,
                    const write_opts &opts)
{
	const const_buffers bufs
	{
		&buf, 1
	};

	return const_buffer
	{
		data(buf), overwrite(path, bufs, opts)
	};
}

ircd::const_buffer
ircd::fs::overwrite(const fd &fd,
                    const const_buffer &buf,
                    const write_opts &opts)
{
	const const_buffers bufs
	{
		&buf, 1
	};

	return const_buffer
	{
		data(buf), overwrite(fd, bufs, opts)
	};
}

size_t
ircd::fs::overwrite(const string_view &path,
                    const const_buffers &bufs,
                    const write_opts &opts)
{
	const fd fd
	{
		path, std::ios::out | std::ios::trunc
	};

	return overwrite(fd, bufs, opts);
}

size_t
ircd::fs::overwrite(const fd &fd,
                    const const_buffers &bufs,
                    const write_opts &opts)
{
	return write(fd, bufs, opts);
}

//
// append
//

ircd::const_buffer
ircd::fs::append(const string_view &path,
                 const const_buffer &buf,
                 const write_opts &opts)
{
	const const_buffers bufs
	{
		&buf, 1
	};

	return const_buffer
	{
		data(buf), append(path, bufs, opts)
	};
}

ircd::const_buffer
ircd::fs::append(const fd &fd,
                 const const_buffer &buf,
                 const write_opts &opts)
{
	const const_buffers bufs
	{
		&buf, 1
	};

	return const_buffer
	{
		data(buf), append(fd, bufs, opts)
	};
}

size_t
ircd::fs::append(const string_view &path,
                 const const_buffers &bufs,
                 const write_opts &opts)
{
	const fd fd
	{
		path, std::ios::out | std::ios::app
	};

	return append(fd, bufs, opts);
}

size_t
ircd::fs::append(const fd &fd,
                 const const_buffers &bufs,
                 const write_opts &opts_)
{
	auto opts(opts_);
	if(support::pwritev2 && support::append)
		opts.offset = -1;
	else if(!opts.offset || opts.offset == -1)
		opts.offset = syscall(::lseek, fd, 0, SEEK_END);

	return write(fd, bufs, opts);
}

//
// write
//

ircd::const_buffer
ircd::fs::write(const string_view &path,
                const const_buffer &buf,
                const write_opts &opts)
{
	const const_buffers bufs
	{
		&buf, 1
	};

	return const_buffer
	{
		data(buf), write(path, bufs, opts)
	};
}

ircd::const_buffer
ircd::fs::write(const fd &fd,
                const const_buffer &buf,
                const write_opts &opts)
{
	const const_buffers bufs
	{
		&buf, 1
	};

	return const_buffer
	{
		data(buf), write(fd, bufs, opts)
	};
}

size_t
ircd::fs::write(const string_view &path,
                const const_buffers &bufs,
                const write_opts &opts)
{
	const fd fd
	{
		path, std::ios::out
	};

	return write(fd, bufs, opts);
}

namespace ircd::fs
{
	static int flags(const write_opts &opts);
	static size_t _write_pwritev2(const fd &, const const_iovec_view &, const write_opts &);
	static size_t _write_pwritev(const fd &, const const_iovec_view &, const write_opts &);
	static size_t write(const fd &, const const_iovec_view &, const write_opts &);
}

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstack-usage="
size_t
__attribute__((stack_protect))
ircd::fs::write(const fd &fd,
                const const_buffers &bufs,
                const write_opts &opts_)
{
	if(unlikely(bufs.size() > info::iov_max))
		throw error
		{
			make_error_code(std::errc::invalid_argument),
			"Buffer count of %zu exceeds IOV_MAX of %zu",
			bufs.size(),
			info::iov_max
		};

	size_t off(0);
	write_opts opts(opts_);
	assert(bufs.size() <= info::iov_max);
	struct ::iovec iovbuf[bufs.size()]; do
	{
		const auto iov
		{
			make_iov({iovbuf, bufs.size()}, bufs, off)
		};

		const size_t last
		{
			write(fd, iov, opts)
		};

		opts.offset += last;
		assert(opts.offset >= opts_.offset);
		off = opts.offset - opts_.offset;
		if(!opts.blocking && !last)
			break;
	}
	while(opts.all && opts_.offset >= 0 && off < buffers::size(bufs));
	assert(opts.offset >= opts_.offset);
	assert(ssize_t(off) == opts.offset - opts_.offset);
	assert(!opts.all || !opts.blocking || off == buffers::size(bufs));
	return off;
}
#pragma GCC diagnostic pop

/// Lowest-level'ish write() call. This call only conducts a single operation
/// (no looping) and can return early with a partial write(). It does have
/// branches for various write_opts. The arguments involve `struct ::iovec`
/// which we do not expose to the ircd.h API; thus this function is internal to
/// ircd::fs. There is no reason to use this function in lieu of the public
/// fs::read() suite.
size_t
ircd::fs::write(const fd &fd,
                const const_iovec_view &iov,
                const write_opts &opts)
{
	assert(opts.op == op::WRITE);

	#ifdef IRCD_USE_IOU
	if(likely(iou::system && opts.aio))
		return iou::write(fd, iov, opts);
	#endif

	#ifdef IRCD_USE_AIO
	if(likely(aio::system && opts.aio))
		return aio::write(fd, iov, opts);
	#endif

	#ifdef HAVE_PWRITEV2
	return support::pwritev2?
		_write_pwritev2(fd, iov, opts):
		_write_pwritev(fd, iov, opts);
	#else
	return _write_pwritev(fd, iov, opts);
	#endif
}

size_t
ircd::fs::_write_pwritev(const fd &fd,
                         const const_iovec_view &iov,
                         const write_opts &opts)
{
	ssize_t ret; do
	{
		ret = ::pwritev(int(fd), iov.data(), iov.size(), opts.offset);
	}
	while(!opts.interruptible && unlikely(ret == -1 && errno == EINTR));

	static_assert(EAGAIN == EWOULDBLOCK);
	if(unlikely(!opts.blocking && ret == -1 && errno == EAGAIN))
		return 0UL;

	if(unlikely(ret == -1))
		throw std::system_error
		{
			errno, std::system_category()
		};

	return ret;
}

#ifdef HAVE_PWRITEV2
size_t
ircd::fs::_write_pwritev2(const fd &fd,
                          const const_iovec_view &iov,
                          const write_opts &opts)
{
	// Manpages sez that when appending with RWF_APPEND, the offset has no
	// effect on the write; but if the value of the offset is -1 then the
	// fd's offset is updated, otherwise it is not.
	const off_t &offset
	{
		opts.offset == -1 && !opts.update_offset? 0 : opts.offset
	};

	ssize_t ret; do
	{
		ret = ::pwritev2(int(fd), iov.data(), iov.size(), opts.offset, flags(opts));
	}
	while(!opts.interruptible && unlikely(ret == -1 && errno == EINTR));

	static_assert(EAGAIN == EWOULDBLOCK);
	if(!opts.blocking && ret == -1 && errno == EAGAIN)
		return 0UL;

	if(unlikely(ret == -1))
		throw std::system_error
		{
			errno, std::system_category()
		};

	return ret;
}
#endif HAVE_PWRITEV2

int
ircd::fs::flags(const write_opts &opts)
{
	int ret{0};

	#if defined(RWF_APPEND)
	assert(opts.offset >= 0 || support::append);
	if(support::append && opts.offset == -1)
		ret |= RWF_APPEND;
	#endif

	#if defined(RWF_HIPRI)
	if(support::hipri && reqprio(opts.priority) == reqprio(opts::highest_priority))
		ret |= RWF_HIPRI;
	#endif

	#if defined(RWF_NOWAIT)
	if(support::nowait && !opts.blocking)
		ret |= RWF_NOWAIT;
	#endif

	#if defined(RWF_DSYNC)
	if(support::dsync && opts.sync && !opts.metadata)
		ret |= RWF_DSYNC;
	#endif

	#if defined(RWF_SYNC)
	if(support::sync && opts.sync && opts.metadata)
		ret |= RWF_SYNC;
	#endif

	#ifdef RWF_WRITE_LIFE_SHIFT
	if(support::rwf_write_life && opts.write_life)
		ret |= (opts.write_life << (RWF_WRITE_LIFE_SHIFT));
	#endif

	return ret;
}

///////////////////////////////////////////////////////////////////////////////
//
// fs/wait.h
//

namespace ircd::fs
{
	static constexpr asio::posix::stream_descriptor::wait_type translate(const ready &) noexcept
	__attribute__((const));
}

decltype(ircd::fs::wait_opts_default)
ircd::fs::wait_opts_default;

void
ircd::fs::wait(const fd &fd,
               const wait_opts &opts)
{
	assert(opts.op == op::WAIT);

	const auto &wait_type
	{
		translate(opts.ready)
	};

	boost::asio::posix::stream_descriptor sd
	{
		ios::get(), int(fd)
	};

	const unwind release{[&sd]
	{
		sd.release();
	}};

	const auto interruption{[&sd]
	(ctx::ctx *const &interruptor)
	{
		sd.cancel();
	}};

	boost::system::error_code ec; continuation
	{
		continuation::asio_predicate, interruption, [&wait_type, &sd, &ec]
		(auto &yield)
		{
			sd.async_wait(wait_type, yield[ec]);
		}
	};

	if(unlikely(ec))
		throw_system_error(ec);
}

constexpr
boost::asio::posix::stream_descriptor::wait_type
ircd::fs::translate(const ready &ready)
noexcept
{
	using wait_type = boost::asio::posix::stream_descriptor::wait_type;

	switch(ready)
	{
		case ready::ANY:
			return wait_type::wait_read | wait_type::wait_write | wait_type::wait_error;

		case ready::READ:
			return wait_type::wait_read;

		case ready::WRITE:
			return wait_type::wait_write;

		case ready::ERROR:
		default:
			return wait_type::wait_error;
	}
}

ircd::string_view
ircd::fs::reflect(const ready &ready)
{
	switch(ready)
	{
		case ready::ANY:      return "ANY";
		case ready::READ:     return "READ";
		case ready::WRITE:    return "WRITE";
		case ready::ERROR:    return "ERROR";
	}

	return "?????";
}

///////////////////////////////////////////////////////////////////////////////
//
// fs/aio.h
//

decltype(ircd::fs::aio::MAX_REQPRIO)
ircd::fs::aio::MAX_REQPRIO
{
	info::aio_reqprio_max
};

/// Conf item to control whether aio is enabled or bypassed.
decltype(ircd::fs::aio::enable)
ircd::fs::aio::enable
{
	{ "name",     "ircd.fs.aio.enable"  },
	{ "default",  true                  },
	{ "persist",  false                 },
};

/// Global stats structure
decltype(ircd::fs::aio::stats)
ircd::fs::aio::stats;

/// Non-null when aio is available for use
decltype(ircd::fs::aio::system)
ircd::fs::aio::system;

//
// init
//

#ifndef IRCD_USE_AIO
[[gnu::weak]]
ircd::fs::aio::init::init()
{
	assert(!system);
}
#endif

#ifndef IRCD_USE_AIO
[[gnu::weak]]
ircd::fs::aio::init::~init()
noexcept
{
	assert(!system);
}
#endif

///////////////////////////////////////////////////////////////////////////////
//
// fs/iou.h
//

decltype(ircd::fs::iou::support)
ircd::fs::iou::support
{
	#ifdef IRCD_USE_IOU
		info::kernel_version[0] > 5 ||
		(info::kernel_version[0] >= 5 && info::kernel_version[1] >= 1)
	#else
		false
	#endif
};

/// Conf item to control whether iou is enabled or bypassed.
decltype(ircd::fs::iou::enable)
ircd::fs::iou::enable
{
	{ "name",     "ircd.fs.iou.enable"  },
	{ "default",  false                 },
	{ "persist",  false                 },
};

/// Global stats structure
decltype(ircd::fs::iou::stats)
ircd::fs::iou::stats;

/// Non-null when iou is available for use
decltype(ircd::fs::iou::system)
ircd::fs::iou::system;

//
// init
//

#ifndef IRCD_USE_IOU
[[gnu::weak]]
ircd::fs::iou::init::init()
{
	assert(!system);
}
#endif

#ifndef IRCD_USE_IOU
[[gnu::weak]]
ircd::fs::iou::init::~init()
noexcept
{
	assert(!system);
}
#endif

///////////////////////////////////////////////////////////////////////////////
//
// fs/fd.h
//

namespace ircd::fs
{
	static uint posix_flags(const std::ios::openmode &mode);
	static long pathconf(const fd &, const int &arg);
}

decltype(ircd::fs::fd::opts::direct_io_enable)
ircd::fs::fd::opts::direct_io_enable
{
	{ "name",     "ircd.fs.fd.direct_io_enable"  },
	{ "default",  true                           },
	{ "persist",  false                          },
};

#if defined(POSIX_FADV_DONTNEED)
size_t
ircd::fs::evict(const fd &fd,
                const size_t &count,
                const opts &opts)
{
	return advise(fd, POSIX_FADV_DONTNEED, count, opts);
}
#else
#warning "POSIX_FADV_DONTNEED not available on this platform."
size_t
ircd::fs::evict(const fd &fd,
                const size_t &count,
                const opts &opts)
{
	return 0UL;
}
#endif

#if defined(HAVE_POSIX_FADVISE)
size_t
ircd::fs::advise(const fd &fd,
                 const int &advice,
                 const size_t &count,
                 const opts &opts)
{
	static const size_t max_count
	{
		128_KiB
	};

	size_t i(0), off, cnt; do
	{
		off = opts.offset + max_count * i++;
		cnt = std::min(opts.offset + count - off, max_count);
		switch(const auto r(::posix_fadvise(fd, off, cnt, advice)); r)
		{
			case 0:   break;
			default:  throw_system_error(r);
		}
	}
	while(off + cnt < opts.offset + count);
	return count;
}
#else
#warning "posix_fadvise(2) not available for this compilation."
size_t
ircd::fs::advise(const fd &fd,
                 const int &advice,
                 const size_t &count,
                 const opts &opts)
{
	return 0UL;
}
#endif

#if defined(HAVE_FCNTL_H) && defined(F_SET_FILE_RW_HINT)
void
ircd::fs::write_life(const fd &fd,
                     const uint64_t &hint)
{
	if(!support::rwh_write_life)
		return;

	syscall(::fcntl, int(fd), F_SET_FILE_RW_HINT, &hint);
}
#else
#warning "F_SET_FILE_RW_HINT not supported on platform."
void
ircd::fs::write_life(const fd &fd,
                     const uint64_t &hint)
{
}
#endif

#if defined(HAVE_FCNTL_H) && defined(F_GET_FILE_RW_HINT)
uint64_t
ircd::fs::write_life(const fd &fd)
noexcept try
{
	uint64_t ret;
	syscall(::fcntl, int(fd), F_GET_FILE_RW_HINT, &ret);
	return ret;
}
catch(const std::system_error &e)
{
	log::derror
	{
		log, "fcntl(F_GET_FILE_RW_HINT) fd:%d :%s",
		int(fd),
		e.what()
	};

	return 0;
}
#else
#warning "F_GET_FILE_RW_HINT not supported on platform."
uint64_t
ircd::fs::write_life(const fd &fd)
{
	return 0UL;
}
#endif

#ifdef HAVE_SYS_STAT_H
ulong
ircd::fs::device(const fd &fd)
{
	struct stat st{0};
	syscall(::fstat, fd, &st);
	return st.st_dev;
}
#else
ulong
ircd::fs::device(const fd &fd)
{
	static_assert
	(
		0, "Please implement this definition"
	)
}
#endif

#ifdef HAVE_SYS_STATFS_H
ulong
ircd::fs::fstype(const fd &fd)
{
	struct statfs f{0};
	syscall(::fstatfs, fd, &f);
	return f.f_type;
}
#else
ulong
ircd::fs::fstype(const fd &fd)
{
	static_assert
	(
		0, "Please implement this definition"
	)
}
#endif

#ifdef __linux__
size_t
ircd::fs::block_size(const fd &fd)
{
	return 512UL;
}
#elif defined(HAVE_SYS_STAT_H)
size_t
ircd::fs::block_size(const fd &fd)
{
	struct stat st;
	syscall(::fstat, fd, &st);
	return st.st_blksize;
}
#else
size_t
ircd::fs::block_size(const fd &fd)
{
	return info::page_size;
}
#endif

long
ircd::fs::pathconf(const fd &fd,
                   const int &arg)
{
	return syscall(::fpathconf, fd, arg);
}

size_t
ircd::fs::size(const fd &fd)
{
	const off_t cur
	{
		syscall(::lseek, fd, 0, SEEK_CUR)
	};

	const off_t end
	{
		syscall(::lseek, fd, 0, SEEK_END)
	};

	syscall(::lseek, fd, cur, SEEK_SET);
	return end;
}

//
// fd::opts
//

ircd::fs::fd::opts::opts(const std::ios::openmode &mode)
:mode
{
	mode
}
,flags
{
	posix_flags(mode)
}
,mask
{
	flags & O_CREAT?
		S_IRUSR | S_IWUSR:
		0U
}
,ate
{
	bool(mode & std::ios::ate)
}
{
}

//
// fd::fd
//

ircd::fs::fd::fd(const int &fdno)
:fdno{fdno}
{
}

ircd::fs::fd::fd(const string_view &path)
:fd{path, opts{}}
{
}

ircd::fs::fd::fd(const string_view &path,
                 const opts &opts)
:fdno{[&path, &opts]
() -> int
{
	const prof::syscall_usage_warning message
	{
		"fs::fs::fd(): open(2): %s", path
	};

	uint flags(opts.flags);
	flags |= opts.direct? O_DIRECT : 0UL;
	flags |= opts.cloexec? O_CLOEXEC : 0UL;
	flags &= opts.nocreate? ~O_CREAT : flags;
	flags |= !opts.blocking? O_NONBLOCK : 0UL;

	const mode_t &mode(opts.mask);
	assert((flags & ~O_CREAT) || mode != 0);

	return syscall(::open, path_cstr(path), flags, mode);
}()}
{
	if(opts.ate)
		syscall(::lseek, fdno, 0, SEEK_END);
}

ircd::fs::fd::fd(fd &&o)
noexcept
:fdno
{
	std::move(o.fdno)
}
{
	o.fdno = -1;
}

ircd::fs::fd &
ircd::fs::fd::operator=(fd &&o)
noexcept
{
	this->~fd();
	fdno = std::move(o.fdno);
	o.fdno = -1;
	return *this;
}

ircd::fs::fd::~fd()
noexcept
{
	if(likely(fdno >= 0)) try
	{
		syscall(::close, fdno);
	}
	catch(const std::exception &e)
	{
		log::critical
		{
			"Failed to close fd:%d :%s",
			fdno,
			e.what()
		};
	}
}

int
ircd::fs::fd::release()
noexcept
{
	const int fdno(this->fdno);
	this->fdno = -1;
	return fdno;
}

ircd::fs::fd::opts
ircd::fs::fd::options()
const
{
	opts ret;
	ret.flags = syscall(::fcntl, int(*this), F_GETFL, 0);

	if((ret.flags & O_RDONLY) == O_RDONLY)
		ret.mode = std::ios::in;

	if((ret.flags & O_WRONLY) == O_WRONLY)
		ret.mode = std::ios::out;

	if((ret.flags & O_RDWR) == O_RDWR)
		ret.mode = std::ios::in | std::ios::out;

	ret.direct = ret.flags & O_DIRECT;
	ret.cloexec = ret.flags & O_CLOEXEC;
	ret.nocreate = ~ret.flags & O_CREAT;
	ret.blocking = ret.flags & O_NONBLOCK;
	return ret;
}

///////////////////////////////////////////////////////////////////////////////
//
// fs/device.h
//

#ifdef __linux__
ircd::string_view
ircd::fs::dev::sysfs(const mutable_buffer &out,
                     const ulong &id,
                     const string_view &relpath)
{
	const string_view path{fmt::sprintf
	{
		path_scratch, "/sys/dev/block/%s/%s",
		sysfs_id(name_scratch, id),
		relpath
	}};

	fs::read_opts opts;
	opts.aio = false;
	return fs::read(path, out, opts);
}
#else
ircd::string_view
ircd::fs::dev::sysfs(const mutable_buffer &out,
                     const ulong &id,
                     const string_view &relpath)
{
	throw panic
	{
		"sysfs(5) is not available."
	};
}
#endif

ircd::string_view
ircd::fs::dev::sysfs_id(const mutable_buffer &out,
                        const ulong &id)
{
	return sysfs_id(out, dev::id(id));
}

ircd::string_view
ircd::fs::dev::sysfs_id(const mutable_buffer &out,
                        const major_minor &id)
{
	return fmt::sprintf
	{
		out, "%lu:%lu", id.first, id.second
	};
}

ulong
ircd::fs::dev::id(const major_minor &id)
{
	return makedev(id.first, id.second);
}

ircd::fs::dev::major_minor
ircd::fs::dev::id(const ulong &id)
{
	return
	{
		major(id), minor(id)
	};
}

///////////////////////////////////////////////////////////////////////////////
//
// fs/opts.h
//

decltype(ircd::fs::opts_default)
ircd::fs::opts_default
{};

decltype(ircd::fs::opts::highest_priority)
ircd::fs::opts::highest_priority
{
	std::numeric_limits<decltype(priority)>::min()
};

///////////////////////////////////////////////////////////////////////////////
//
// fs/op.h
//

ircd::string_view
ircd::fs::reflect(const op &op)
{
	switch(op)
	{
		case op::NOOP:    return "NOOP";
		case op::READ:    return "READ";
		case op::WRITE:   return "WRITE";
		case op::SYNC:    return "SYNC";
		case op::WAIT:    return "WAIT";
	}

	return "????";
}

#ifndef IRCD_USE_AIO
[[gnu::weak]]
ircd::fs::op
ircd::fs::aio::translate(const int &val)
{
	return op::NOOP;
}
#endif

#ifndef IRCD_USE_IOU
[[gnu::weak]]
ircd::fs::op
ircd::fs::iou::translate(const int &val)
{
	return op::NOOP;
}
#endif

///////////////////////////////////////////////////////////////////////////////
//
// fs/iov.h
//

ircd::fs::const_iovec_view
ircd::fs::make_iov(const iovec_view &iov,
                   const mutable_buffers &bufs,
                   const size_t &offset)
{
	assert(offset <= buffers::size(bufs));
	const size_t max
	{
		std::min(iov.size(), bufs.size())
	};

	size_t i(0), off(0);
	for(; i < max; off += size(bufs[i++]))
		if(size(bufs[i]) >= offset - off)
		{
			assert(offset >= off);
			off = offset - off;
			break;
		}

	assert(i <= max);
	if(i < max)
	{
		assert(off <= size(bufs[i]));
		iov.at(i) =
		{
			data(bufs[i]) + off, size(bufs[i]) - off
		};

		for(++i; i < max; ++i)
			iov.at(i) =
			{
				data(bufs[i]), size(bufs[i])
			};
	}

	assert(i <= max);
	const const_iovec_view ret{iov.data(), i};
	assert(bytes(ret) <= buffer::buffers::size(bufs));
	return ret;
}

ircd::fs::const_iovec_view
ircd::fs::make_iov(const iovec_view &iov,
                   const const_buffers &bufs,
                   const size_t &offset)
{
	assert(offset <= buffers::size(bufs));
	const size_t max
	{
		std::min(iov.size(), bufs.size())
	};

	size_t i(0), off(0);
	for(; i < max; off += size(bufs[i++]))
		if(size(bufs[i]) >= offset - off)
		{
			assert(offset >= off);
			off = offset - off;
			break;
		}

	assert(i <= max);
	if(i < max)
	{
		assert(off <= size(bufs[i]));
		iov.at(i) =
		{
			const_cast<char *>(data(bufs[i])) + off, size(bufs[i]) - off
		};

		for(++i; i < max; ++i)
			iov.at(i) =
			{
				const_cast<char *>(data(bufs[i])), size(bufs[i])
			};
	}

	assert(i <= max);
	const const_iovec_view ret{iov.data(), i};
	assert(bytes(ret) <= buffer::buffers::size(bufs));
	return ret;
}

size_t
ircd::fs::bytes(const const_iovec_view &iov)
{
	return std::accumulate(begin(iov), end(iov), size_t(0), []
	(auto ret, const auto &iov)
	{
		return ret += iov.iov_len;
	});
}

///////////////////////////////////////////////////////////////////////////////
//
// fs/error.h
//

std::string
ircd::string(const boost::filesystem::filesystem_error &e)
{
	return ircd::string(512, [&e]
	(const mutable_buffer &buf)
	{
		return string(buf, e);
	});
}

ircd::string_view
ircd::string(const mutable_buffer &buf,
             const boost::filesystem::filesystem_error &e)
{
	return fmt::sprintf
	{
		buf, "%s :%s", e.code().category().name(), e.what()
	};
}

std::system_error
ircd::make_system_error(const boost::filesystem::filesystem_error &e)
{
	return std::system_error
	{
		make_error_code(e), e.what()
	};
}

std::error_code
ircd::make_error_code(const boost::filesystem::filesystem_error &e)
{
	const boost::system::error_code &ec
	{
		e.code()
	};

	return make_error_code(ec);
}

//
// error::error
//

decltype(ircd::fs::error::buf) thread_local
ircd::fs::error::buf;

ircd::fs::error::error(const boost::filesystem::filesystem_error &e)
:std::system_error
{
	make_system_error(e)
}
{
}

///////////////////////////////////////////////////////////////////////////////
//
// Internal utils
//

uint
ircd::fs::posix_flags(const std::ios::openmode &mode)
{
	static const auto rdwr
	{
		std::ios::in | std::ios::out
	};

	uint ret{0};
	if((mode & rdwr) == rdwr)
		ret |= O_RDWR;
	else if(mode & std::ios::out)
		ret |= O_WRONLY;
	else
		ret |= O_RDONLY;

	ret |= mode & std::ios::trunc? O_TRUNC : 0;
	ret |= mode & std::ios::app? O_APPEND : 0;
	ret |= ret & O_WRONLY? O_CREAT : 0;
	ret |= ret & O_RDWR && ret & (O_TRUNC | O_APPEND)? O_CREAT : 0;
	return ret;
}

/// Translate an ircd::fs opts priority integer to an AIO priority integer.
/// The ircd::fs priority integer is like a nice value. The AIO value is
/// positive [0, MAX_REQPRIO]. This function takes an ircd::fs value and
/// shifts it to the AIO value.
int
ircd::fs::reqprio(int input)
noexcept
{
	const auto &max_reqprio
	{
		aio::MAX_REQPRIO
	};

	static const auto median
	{
		int(max_reqprio / 2)
	};

	input = std::max(input, 0 - median);
	input = std::min(input, median);
	input = max_reqprio - (input + median);
	assert(input >= 0 && input <= int(max_reqprio));
	return input;
}