diff --git a/include/ircd/db.h b/include/ircd/db.h index eafcf92e3..3c2450b0d 100644 --- a/include/ircd/db.h +++ b/include/ircd/db.h @@ -48,17 +48,72 @@ IRCD_EXCEPTION(error, try_again) std::string path(const std::string &name); +template +struct optval +:std::pair +{ + optval(const T &key, const ssize_t &val = std::numeric_limits::min()); +}; + +template +using optlist = std::initializer_list>; + +// Reads may be posted to a separate thread which incurs the time of IO while the calling +// ircd::context yields. +enum class get +{ + PIN, // Keep iter data in memory for iter lifetime (good for lots of ++/--) + CACHE, // Update the cache (CACHE is default for non-iterator operations) + NO_CACHE, // Do not update the cache (NO_CACHE is default for iterators) + NO_SNAPSHOT, // Snapshots provide consistent views for iteration. + NO_CHECKSUM, // Integrity of data will be checked unless this is specified + READAHEAD, // Pair with a size in bytes for prefetching additional data +}; + +struct gopts +:optlist +{ + template gopts(list&&... l): optlist{std::forward(l)...} {} +}; + +// Writes usually occur without yielding your context because the DB is write-log oriented. +enum class set +{ + FSYNC, // Uses kernel filesystem synchronization after write (slow) + NO_JOURNAL, // Write Ahead Log (WAL) for some crash recovery + MISSING_COLUMNS // No exception thrown when writing to a deleted column family +}; + +struct sopts +:optlist +{ + template sopts(list&&... l): optlist{std::forward(l)...} {} +}; + +enum class opt +{ + NO_CREATE, // A new database may be created (if none found) unless this is specified + NO_EXISTING, // An error is given if database already exists + NO_CHECKSUM, // (paranoid_checks) + NO_MADV_DONTNEED, // Never issue MADV_DONTNEED (on windows turns off all pagecaching!) + NO_MADV_RANDOM, // Skip MADV_RANDOM on database file opening + FALLOCATE, // Allow use of fallocate() + NO_FALLOCATE, // Disallow use fallocate() calls + NO_FDATASYNC, // Flushing is only ever directed by the kernel pagecache + FSYNC, // Use fsync() instead of fdatasync() + MMAP_READS, // mmap() table files for reading + MMAP_WRITES, // mmap() table files for writing (hinders db journal) + STATS_THREAD, // Stats collection etc related to DB threading (thread_track) + STATS_MALLOC, // Stats collection for memory allocation when applicable + OPEN_FAST, // Skips a lot of stuff to make opening a handle faster + OPEN_SMALL, // Optimizes the cache hierarchy for < 1GiB databases. + OPEN_BULKLOAD, // Optimizes the handle to accept a large amount of writes at once +}; + struct opts +:optlist { - bool create_if_missing = true; -}; - -struct read_opts -{ -}; - -struct write_opts -{ + template opts(list&&... l): optlist{std::forward(l)...} {} }; class handle @@ -70,11 +125,11 @@ class handle using char_closure = std::function; using string_closure = std::function; - bool has(const std::string &key, const read_opts & = {}); - void get(const std::string &key, const char_closure &, const read_opts & = {}); - void set(const std::string &key, const std::string &value, const write_opts & = {}); + bool has(const std::string &key, const gopts & = {}); + void get(const std::string &key, const char_closure &, const gopts & = {}); + void set(const std::string &key, const std::string &value, const sopts & = {}); - handle(const std::string &name, const opts &opts = {}); + handle(const std::string &name, const opts & = {}); ~handle() noexcept; }; @@ -86,3 +141,10 @@ struct init } // namespace db } // namespace ircd + +template +ircd::db::optval::optval(const T &key, + const ssize_t &val) +:std::pair{key, val} +{ +} diff --git a/include/ircd/db_meta.h b/include/ircd/db_meta.h deleted file mode 100644 index 83155e323..000000000 --- a/include/ircd/db_meta.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2016 Charybdis Development Team - * Copyright (C) 2016 Jason Volk - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice is present in all copies. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - */ - -#pragma once -#define HAVE_IRCD_DB_META_H - -#include - -namespace ircd { -namespace db { - -struct meta -{ - std::string name; - std::string path; - rocksdb::Options opts; - - meta(const std::string &name, const std::string &path, const struct opts &opts); -}; - -inline -meta::meta(const std::string &name, - const std::string &path, - const struct opts &opts) -:name{name} -,path{path} -,opts{[&opts] -{ - rocksdb::Options ret; - ret.create_if_missing = opts.create_if_missing; - return ret; -}()} -{ -} - -} // namespace db -} // namespace ircd diff --git a/ircd/db.cc b/ircd/db.cc index dd0fc1167..fc29e286f 100644 --- a/ircd/db.cc +++ b/ircd/db.cc @@ -21,12 +21,23 @@ */ #include -#include namespace ircd { namespace db { -using rocksdb::DB; +void throw_on_error(const rocksdb::Status &); + +rocksdb::WriteOptions make_opts(const sopts &); +rocksdb::ReadOptions make_opts(const gopts &); +rocksdb::Options make_opts(const opts &); + +struct meta +{ + std::string name; + std::string path; + rocksdb::Options opts; + std::shared_ptr cache; +}; namespace work { @@ -47,6 +58,12 @@ namespace work } void throw_on_error(const rocksdb::Status &); +rocksdb::Iterator &seek(rocksdb::Iterator &, const iter::op &); +template bool has_opt(const optlist &, const T &); +rocksdb::WriteOptions make_opts(const sopts &); +rocksdb::ReadOptions make_opts(const gopts &, const bool &iterator = false); +rocksdb::Options make_opts(const opts &); + void query(std::function); } // namespace db @@ -68,15 +85,42 @@ noexcept db::handle::handle(const std::string &name, const opts &opts) try -:meta{std::make_unique(name, path(name), opts)} -,d{[this, &name] +:meta{[&name, &opts] { - DB *ptr; - throw_on_error(DB::Open(meta->opts, path(name), &ptr)); - std::unique_ptr ret{ptr}; - return ret; + auto meta(std::make_unique()); + meta->name = name; + meta->path = path(name); + meta->opts = make_opts(opts); + meta->opts.row_cache = meta->cache; + return std::move(meta); +}()} +,d{[this] +{ + rocksdb::DB *ptr; + throw_on_error(rocksdb::DB::Open(meta->opts, meta->path, &ptr)); + return std::unique_ptr{ptr}; }()} { + log.info("Opened database \"%s\" @ `%s' (handle: %p)", + meta->name.c_str(), + meta->path.c_str(), + (const void *)this); +} +catch(const invalid_argument &e) +{ + const bool no_create(has_opt(opts, opt::NO_CREATE)); + const bool no_existing(has_opt(opts, opt::NO_EXISTING)); + const char *const helpstr + { + no_create? " (The database is missing and will not be created)": + no_existing? " (The database already exists but must be fresh)": + "" + }; + + throw error("Failed to open db '%s': %s%s", + name.c_str(), + e.what(), + helpstr); } catch(const std::exception &e) { @@ -93,30 +137,30 @@ noexcept void db::handle::set(const std::string &key, const std::string &value, - const write_opts &opts) + const sopts &sopts) { - using rocksdb::WriteOptions; using rocksdb::Slice; const Slice k(key.data(), key.size()); const Slice v(value.data(), value.size()); - throw_on_error(d->Put(WriteOptions(), k, v)); + + auto opts(make_opts(sopts)); + throw_on_error(d->Put(opts, k, v)); } void db::handle::get(const std::string &key, const char_closure &func, - const read_opts &opts) + const gopts &gopts) { - using rocksdb::ReadOptions; using rocksdb::Iterator; using rocksdb::Slice; - ReadOptions ropts; + auto opts(make_opts(gopts)); const Slice sk(key.data(), key.size()); - query([this, &sk, &func, &ropts] + query([this, &sk, &func, &opts] { - const std::unique_ptr it(d->NewIterator(ropts)); + const std::unique_ptr it(d->NewIterator(opts)); it->Seek(sk); throw_on_error(it->status()); @@ -128,30 +172,28 @@ db::handle::get(const std::string &key, bool db::handle::has(const std::string &key, - const read_opts &opts) + const gopts &gopts) { - using rocksdb::ReadOptions; - using rocksdb::Iterator; using rocksdb::Slice; + using rocksdb::Iterator; + using rocksdb::Status; bool ret; - ReadOptions ropts; + auto opts(make_opts(gopts)); const Slice k(key.data(), key.size()); - query([this, &k, &ret, &ropts] + query([this, &k, &ret, &opts] { - if(!d->KeyMayExist(ropts, k, nullptr, nullptr)) + if(!d->KeyMayExist(opts, k, nullptr, nullptr)) { ret = false; return; } - const std::unique_ptr it(d->NewIterator(ropts)); + const std::unique_ptr it(d->NewIterator(opts)); it->Seek(k); switch(it->status().code()) { - using rocksdb::Status; - case Status::kOk: ret = true; return; case Status::kNotFound: ret = false; return; default: @@ -168,7 +210,7 @@ db::query(std::function func) std::exception_ptr eptr; auto &context(ctx::cur()); std::atomic done{false}; - auto closure([func(std::move(func)), &eptr, &context, &done] + auto closure([&func, &eptr, &context, &done] () noexcept { try @@ -260,6 +302,164 @@ db::work::pop() return std::move(c); } +rocksdb::Options +db::make_opts(const opts &opts) +{ + rocksdb::Options ret; + ret.create_if_missing = true; // They default this to false, but we invert the option + + for(const auto &o : opts) switch(o.first) + { + case opt::NO_CREATE: + ret.create_if_missing = false; + continue; + + case opt::NO_EXISTING: + ret.error_if_exists = true; + continue; + + case opt::NO_CHECKSUM: + ret.paranoid_checks = false; + continue; + + case opt::NO_MADV_DONTNEED: + ret.allow_os_buffer = false; + continue; + + case opt::NO_MADV_RANDOM: + ret.advise_random_on_open = false; + continue; + + case opt::FALLOCATE: + ret.allow_fallocate = true; + continue; + + case opt::NO_FALLOCATE: + ret.allow_fallocate = false; + continue; + + case opt::NO_FDATASYNC: + ret.disableDataSync = true; + continue; + + case opt::FSYNC: + ret.use_fsync = true; + continue; + + case opt::MMAP_READS: + ret.allow_mmap_reads = true; + continue; + + case opt::MMAP_WRITES: + ret.allow_mmap_writes = true; + continue; + + case opt::STATS_THREAD: + ret.enable_thread_tracking = true; + continue; + + case opt::STATS_MALLOC: + ret.dump_malloc_stats = true; + continue; + + case opt::OPEN_FAST: + ret.skip_stats_update_on_db_open = true; + continue; + + case opt::OPEN_BULKLOAD: + ret.PrepareForBulkLoad(); + continue; + + case opt::OPEN_SMALL: + ret.OptimizeForSmallDb(); + continue; + + default: + continue; + } + + return ret; +} + +rocksdb::ReadOptions +db::make_opts(const gopts &opts, + const bool &iterator) +{ + rocksdb::ReadOptions ret; + + if(iterator) + ret.fill_cache = false; + + for(const auto &opt : opts) switch(opt.first) + { + case get::PIN: + ret.pin_data = true; + continue; + + case get::CACHE: + ret.fill_cache = true; + continue; + + case get::NO_CACHE: + ret.fill_cache = false; + continue; + + case get::NO_CHECKSUM: + ret.verify_checksums = false; + continue; + + case get::READAHEAD: + ret.readahead_size = opt.second; + continue; + + default: + continue; + } + + return ret; +} + +rocksdb::WriteOptions +db::make_opts(const sopts &opts) +{ + rocksdb::WriteOptions ret; + for(const auto &opt : opts) switch(opt.first) + { + case set::FSYNC: + ret.sync = true; + continue; + + case set::NO_JOURNAL: + ret.disableWAL = true; + continue; + + case set::MISSING_COLUMNS: + ret.ignore_missing_column_families = true; + continue; + + default: + continue; + } + + return ret; +} + +template +bool +db::has_opt(const optlist &list, + const T &opt) +{ + const auto check([&opt] + (const auto &pair) + { + return pair.first == opt; + }); + + return std::find_if(begin(list), end(list), check) != end(list); +} + +} + std::string db::path(const std::string &name) {