From e7844a0771d3a4df13f3e5f5fcc9184ccee025da Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Sun, 2 Apr 2017 21:02:32 -0700 Subject: [PATCH] ircd::db: Database system developments. --- include/ircd/db.h | 40 +- include/ircd/db/cell.h | 126 +++ include/ircd/db/column.h | 112 ++- include/ircd/db/const_iterator.h | 83 -- include/ircd/db/database.h | 39 +- include/ircd/db/delta.h | 13 - include/ircd/db/object.h | 306 +++---- include/ircd/db/opts.h | 1 + include/ircd/db/row.h | 192 ++++- ircd/db.cc | 1311 ++++++++++++++++++++---------- 10 files changed, 1470 insertions(+), 753 deletions(-) create mode 100644 include/ircd/db/cell.h delete mode 100644 include/ircd/db/const_iterator.h diff --git a/include/ircd/db.h b/include/ircd/db.h index 3266f4c9a..bbf9879ff 100644 --- a/include/ircd/db.h +++ b/include/ircd/db.h @@ -30,6 +30,11 @@ namespace ircd { namespace db { +struct cell; +struct row; +struct column; +struct database; + // Errors for the database subsystem. The exceptions that use _HIDENAME // are built from RocksDB errors which already have an info string with // an included name. @@ -55,11 +60,14 @@ extern struct log::log log; } // namespace db } // namespace ircd +// // These are forward declarations to objects we may carry a pointer to. // Users of ircd::db should not be dealing with these types. +// namespace rocksdb { struct DB; + struct Slice; struct Options; struct DBOptions; struct ColumnFamilyOptions; @@ -71,22 +79,34 @@ namespace rocksdb struct Snapshot; } -#include "db/opts.h" -#include "db/delta.h" -#include "db/database.h" -#include "db/column.h" -#include "db/const_iterator.h" -#include "db/row.h" - +// +// Misc utils +// namespace ircd { namespace db { +rocksdb::Slice slice(const string_view &); +string_view slice(const rocksdb::Slice &); + +bool valid(const rocksdb::Iterator &); +string_view key(const rocksdb::Iterator &); +string_view val(const rocksdb::Iterator &); + std::string path(const std::string &name); std::vector available(); +void log_rdb_perf_context(const bool &all = true); + } // namespace db } // namespace ircd +#include "db/delta.h" +#include "db/database.h" +#include "db/opts.h" +#include "db/column.h" +#include "db/cell.h" +#include "db/row.h" + namespace ircd { namespace db { @@ -94,3 +114,9 @@ std::string merge_operator(const string_view &, const std::pair + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice is present in all copies. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#pragma once +#define HAVE_IRCD_DB_CELL_H + +namespace ircd { +namespace db { + +struct cell +{ + struct delta; + + column c; + string_view index; + database::snapshot ss; + std::unique_ptr it; + + public: + operator const rocksdb::Iterator &() const { return *it; } + operator const database::snapshot &() const { return ss; } + explicit operator const column &() const { return c; } + operator rocksdb::Iterator &() { return *it; } + operator database::snapshot &() { return ss; } + explicit operator column &() { return c; } + + bool valid() const; // cell exists + operator bool() const { return valid(); } + bool operator!() const { return !valid(); } + + // [GET] read from cell (zero-copy) + string_view col() const { return name(c); /* always column name */ } + string_view key() const; // key == index or empty on invalid + string_view val() const; // empty on !valid() + string_view key(); // reload then key == index or empty on invalid + string_view val(); // reload then empty on !valid() + + // [GET] read from cell (zero-copy) + explicit operator string_view() const; // empty on !valid() + explicit operator string_view(); // reload then empty on !valid() + + // [SET] assign cell + cell &operator=(const string_view &); + + // [GET] -> [SET] assign cell (atomic) + bool compare_exchange(string_view &expected, const string_view &desired); + string_view exchange(const string_view &desired); + + bool load(gopts = {}); + + cell(column, const string_view &index, std::unique_ptr); + cell(column, const string_view &index, gopts = {}); + cell(database &, const string_view &column, const string_view &index, gopts = {}); + cell(); + cell(cell &&) noexcept; + cell(const cell &) = delete; + cell &operator=(cell &&) noexcept; + cell &operator=(const cell &) = delete; + ~cell() noexcept; + + friend std::ostream &operator<<(std::ostream &s, const cell &c); +}; + +struct cell::delta +:std::tuple +{ + delta(const enum op &op, cell &c, const string_view &val = {}) + :std::tuple{op, c, val} + {} + + delta(cell &c, const string_view &val, const enum op &op = op::SET) + :std::tuple{op, c, val} + {} +}; + +// [SET] Perform operations in a sequence as a single transaction. +void write(const cell::delta &, const sopts & = {}); +void write(const std::initializer_list &, const sopts & = {}); +void write(const sopts &, const std::initializer_list &); + +const std::string &name(const cell &); +uint64_t sequence(const cell &); + +} // namespace db +} // namespace ircd + +inline +uint64_t +ircd::db::sequence(const cell &c) +{ + const database::snapshot &ss(c); + return sequence(ss); +} + +inline +const std::string & +ircd::db::name(const cell &c) +{ + return name(c.c); +} + +inline std::ostream & +ircd::db::operator<<(std::ostream &s, const cell &c) +{ + s << string_view{c}; + return s; +} diff --git a/include/ircd/db/column.h b/include/ircd/db/column.h index 7fa7b4ea0..aade76706 100644 --- a/include/ircd/db/column.h +++ b/include/ircd/db/column.h @@ -37,6 +37,7 @@ namespace db { // struct column { + struct delta; struct const_iterator; using key_type = string_view; using mapped_type = string_view; @@ -47,16 +48,14 @@ struct column using iterator = const_iterator; protected: - using ColumnFamilyHandle = rocksdb::ColumnFamilyHandle; - - database::column *c; + std::shared_ptr c; public: - operator const database &() const { return database::get(*c); } - operator const database::column &() const { return *c; } + explicit operator const database &() const; + explicit operator const database::column &() const; - operator database &() { return database::get(*c); } - operator database::column &() { return *c; } + explicit operator database &(); + explicit operator database::column &(); operator bool() const { return bool(c); } bool operator!() const { return !c; } @@ -70,30 +69,90 @@ struct column const_iterator lower_bound(const string_view &key, const gopts & = {}); const_iterator upper_bound(const string_view &key, const gopts & = {}); + // [GET] Get cell + cell operator[](const string_view &key) const; + // [GET] Perform a get into a closure. This offers a reference to the data with zero-copy. + using view_closure = std::function; void operator()(const string_view &key, const view_closure &func, const gopts & = {}); void operator()(const string_view &key, const gopts &, const view_closure &func); // [SET] Perform operations in a sequence as a single transaction. void operator()(const delta &, const sopts & = {}); void operator()(const std::initializer_list &, const sopts & = {}); + void operator()(const sopts &, const std::initializer_list &); void operator()(const op &, const string_view &key, const string_view &val = {}, const sopts & = {}); + explicit column(std::shared_ptr c); + column(database::column &c); column(database &, const string_view &column); - column(database::column &c) - :c{&c} - {} - column() = default; }; -// Get property data of a db column (column). +struct column::delta +:std::tuple +{ + delta(const enum op &op, const string_view &key, const string_view &val = {}) + :std::tuple{op, key, val} + {} + + delta(const string_view &key, const string_view &val, const enum op &op = op::SET) + :std::tuple{op, key, val} + {} +}; + +struct column::const_iterator +{ + using value_type = column::value_type; + using iterator_category = std::bidirectional_iterator_tag; + + private: + gopts opts; + std::shared_ptr c; + std::unique_ptr it; + mutable value_type val; + + friend class column; + const_iterator(std::shared_ptr, std::unique_ptr &&, gopts = {}); + + public: + operator const database::column &() const { return *c; } + operator const database::snapshot &() const { return opts.snapshot; } + explicit operator const gopts &() const { return opts; } + + operator database::column &() { return *c; } + explicit operator database::snapshot &() { return opts.snapshot; } + + operator bool() const; + bool operator!() const; + + const value_type *operator->() const; + const value_type &operator*() const; + + const_iterator &operator++(); + const_iterator &operator--(); + + const_iterator(); + const_iterator(const_iterator &&) noexcept; + const_iterator &operator=(const_iterator &&) noexcept; + ~const_iterator() noexcept; + + friend bool operator==(const const_iterator &, const const_iterator &); + friend bool operator!=(const const_iterator &, const const_iterator &); + friend bool operator<(const const_iterator &, const const_iterator &); + friend bool operator>(const const_iterator &, const const_iterator &); + + template friend void seek(column::const_iterator &, const pos &); + friend void seek(column::const_iterator &, const string_view &key); +}; + +// Get property data of a db column. // R can optionally be uint64_t for some values. template R property(column &, const string_view &name); template<> std::string property(column &, const string_view &name); template<> uint64_t property(column &, const string_view &name); -// Information about a column (column) +// Information about a column const std::string &name(const column &); size_t file_count(column &); size_t bytes(column &); @@ -121,18 +180,27 @@ void flush(column &, const bool &blocking = false); } // namespace ircd inline -ircd::db::column::column(database &d, - const string_view &column_name) -try -:column +ircd::db::column::operator database::column &() { - *d.columns.at(column_name) + return *c; } + +inline +ircd::db::column::operator database &() { + return database::get(*c); } -catch(const std::out_of_range &e) + +inline +ircd::db::column::operator const database::column &() +const { - log.error("'%s' failed to open non-existent column '%s'", - d.name, - column_name); + return *c; +} + +inline +ircd::db::column::operator const database &() +const +{ + return database::get(*c); } diff --git a/include/ircd/db/const_iterator.h b/include/ircd/db/const_iterator.h deleted file mode 100644 index d1d3e0ade..000000000 --- a/include/ircd/db/const_iterator.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2016 Charybdis Development Team - * Copyright (C) 2016 Jason Volk - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice is present in all copies. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - */ - -#pragma once -#define HAVE_IRCD_DB_CONST_ITERATOR_H - -namespace ircd { -namespace db { - -struct column::const_iterator -{ - struct state; - - using key_type = string_view; - using mapped_type = string_view; - using value_type = std::pair; - using pointer = value_type *; - using reference = value_type &; - using difference_type = size_t; - using iterator_category = std::bidirectional_iterator_tag; - - private: - gopts opts; - database::column *c; - std::unique_ptr it; - mutable value_type val; - - friend class column; - const_iterator(database::column &, std::unique_ptr &&, gopts = {}); - - public: - operator const database::column &() const { return *c; } - operator const database::snapshot &() const { return opts.snapshot; } - explicit operator const gopts &() const { return opts; } - - operator database::column &() { return *c; } - explicit operator database::snapshot &() { return opts.snapshot; } - - operator bool() const; - bool operator!() const; - bool operator<(const const_iterator &) const; - bool operator>(const const_iterator &) const; - bool operator==(const const_iterator &) const; - bool operator!=(const const_iterator &) const; - bool operator<=(const const_iterator &) const; - bool operator>=(const const_iterator &) const; - - const value_type *operator->() const; - const value_type &operator*() const; - - const_iterator &operator++(); - const_iterator &operator--(); - - const_iterator(); - const_iterator(const_iterator &&) noexcept; - const_iterator &operator=(const_iterator &&) noexcept; - ~const_iterator() noexcept; - - template friend void seek(column::const_iterator &, const pos &); - friend void seek(column::const_iterator &, const string_view &key); -}; - -} // namespace db -} // namespace ircd diff --git a/include/ircd/db/database.h b/include/ircd/db/database.h index 94839c2e9..3bcaaebdc 100644 --- a/include/ircd/db/database.h +++ b/include/ircd/db/database.h @@ -38,16 +38,6 @@ struct database struct comparator; struct column; - struct descriptor - { - using typing = std::pair; - - std::string name; - typing type { typeid(string_view), typeid(string_view) }; - std::string options {}; - db::comparator cmp {}; - }; - static std::map dbs; // open databases std::string name; @@ -61,6 +51,20 @@ struct database custom_ptr d; public: + struct descriptor + { + using typing = std::pair; + + std::string name; + std::string explain; + typing type { typeid(string_view), typeid(string_view) }; + std::string options {}; + db::comparator cmp {}; + }; + + using description = std::initializer_list; + + operator std::shared_ptr() { return shared_from_this(); } operator const rocksdb::DB &() const { return *d; } operator rocksdb::DB &() { return *d; } @@ -69,7 +73,7 @@ struct database database(const std::string &name, const std::string &options = {}, - std::initializer_list = {}); + description = {}); database() = default; database(database &&) = delete; @@ -143,21 +147,24 @@ struct database::snapshot ~snapshot() noexcept; }; +// Linkage to get shared_ptr of database::column +std::shared_ptr shared_from(const database::column &); +std::shared_ptr shared_from(database::column &); + // Get property data from all columns in DB. Only integer properties supported. template R property(database &, const string_view &name); template<> uint64_t property(database &, const string_view &name); +const std::string &name(const database &); const std::string &name(const database::column &); -uint32_t id(const database::column &); -void drop(database::column &); // Request to erase column from db uint64_t sequence(const database::snapshot &); // Sequence of a snapshot uint64_t sequence(const database &); // Latest sequence number +uint32_t id(const database::column &); +void drop(database::column &); // Request to erase column from db + void sync(database &); // Sync the write log (all columns) } // namespace db - -using database = db::database; - } // namespace ircd diff --git a/include/ircd/db/delta.h b/include/ircd/db/delta.h index c52bda756..51873ddc6 100644 --- a/include/ircd/db/delta.h +++ b/include/ircd/db/delta.h @@ -36,22 +36,9 @@ enum op SINGLE_DELETE, }; -struct delta -:std::tuple -{ - delta(const enum op &op, const string_view &key, const string_view &val = {}) - :std::tuple{op, key, val} - {} - - delta(const string_view &key, const string_view &val, const enum op &op = op::SET) - :std::tuple{op, key, val} - {} -}; - using merge_delta = std::pair; using merge_closure = std::function; using update_closure = std::function; -using view_closure = std::function; struct comparator { diff --git a/include/ircd/db/object.h b/include/ircd/db/object.h index 1a407005a..3d4f102c0 100644 --- a/include/ircd/db/object.h +++ b/include/ircd/db/object.h @@ -23,22 +23,24 @@ #pragma once #define HAVE_IRCD_DB_OBJECT_H +// handler register(username): +// +// (let database value registered = 0) +// +// context A | context B +// | +//0 enter | +//1 if(registered) | enter ; A yields on cache-miss/IO read +//2 | if(registered) ; B resumes hitting cached value A fetched +//3 | bnt return; ; B continues without yield +//4 | registered = time(nullptr); ; B assigns B's value to database +//5 b?t return; | leave ; A resumes [what does if() see?] +//6 registered = time(nullptr); | ; A overwrites B's value +//7 leave | ; ??? + namespace ircd { namespace db { -template -struct transaction -{ - string_view index; - database::snapshot snapshot; - //something transaction; - - transaction(const string_view &index = {}) - :index{index} - ,snapshot{*d} - {} -}; - template struct value @@ -47,186 +49,99 @@ struct value template struct value +:cell { - mutable column h; - transaction *t; - value(const string_view &name, - transaction &t) - :h{!name.empty()? column{*d, name} : column{}} - ,t{&t} + const string_view &index) + :cell{*d, name, index} {} - value() - :t{nullptr} - {} + using cell::cell; }; -template +template struct object +:row { - struct iterator; + struct const_iterator; - using key_type = string_view; - using mapped_type = value; - using value_type = std::pair; - using pointer = value_type *; - using reference = value_type &; - using size_type = size_t; - using difference_type = ptrdiff_t; + string_view prefix; + string_view index; - transaction *t; - - iterator begin(); - iterator end(); - - object(transaction &t) - :t{&t} - {} - - object() - :t{nullptr} - {} + object(const string_view &prefix, const string_view &index); + object() = default; }; - -template -struct object::iterator +template +object::object(const string_view &prefix, + const string_view &index) +:row{[&prefix, &index]() -> row { - using key_type = string_view; - using mapped_type = value; - using value_type = std::pair; - using pointer = value_type *; - using reference = value_type &; - using size_type = size_t; - using difference_type = ptrdiff_t; + // The prefix is the name of the object we want to find members in. + // This function has to find columns starting with the prefix but not + // containing any additional '.' except the one directly after the prefix, + // as more '.' indicates sub-members which we don't fetch here. - friend class object; - - protected: - transaction *t; - decltype(database::columns)::iterator it; - value_type last; - value_type val; - - void seek_next(); - - public: - const value_type *operator->() const { return &val; } - const value_type &operator*() const { return *operator->(); } - - bool operator==(const iterator &o) const { return it == o.it; } - bool operator!=(const iterator &o) const { return it != o.it; } - bool operator<(const iterator &o) const { return it < o.it; } - - iterator &operator++() + auto &columns(d->columns); + auto low(columns.lower_bound(prefix)), hi(low); + for(; hi != std::end(columns); ++hi) { - ++it; - seek_next(); - return *this; + const auto &name(hi->first); + if(!startswith(name, prefix)) + break; } - iterator(transaction &t) - :t{&t} - {} - - iterator() - :t{nullptr} - {} -}; - -template -typename object::iterator -object::end() -{ - iterator ret{}; - ret.it = std::end(d->columns); - return ret; -} - -template -typename object::iterator -object::begin() -{ - iterator ret{*t}; - ret.it = std::begin(d->columns); - ret.seek_next(); - return ret; -} - -template -void -object::iterator::seek_next() -{ - const auto ptc(tokens_count(prefix, ".")); - while(it != std::end(d->columns)) + string_view names[std::distance(low, hi)]; + std::transform(low, hi, names, [&prefix](const auto &pair) { - const auto &pair(*it); - if(!startswith(pair.first, prefix)) - { - ++it; - continue; - } + const auto &path(pair.first); - const auto ktc(tokens_count(pair.first, ".")); - if(ktc != ptc + 1) - { - const auto com(std::min(tokens_count(last.first, "."), ptc + 1)); - if(!com || token(last.first, ".", com - 1) == token(pair.first, ".", com - 1)) - { - ++it; - continue; - } - } + // Find members of this object by removing the prefix and then removing + // any members which have a '.' indicating they are not at this level. + string_view name(path); + name = lstrip(name, prefix); + name = lstrip(name, '.'); + if(tokens_count(name, ".") != 1) + return string_view{}; - bool bad(false); - const auto com(std::min(ktc, ptc)); - if(com) - for(size_t i(0); i < com - 1 && !bad; i++) - if(token(prefix, ".", i) != token(pair.first, ".", i)) - bad = true; - if(bad) - { - ++it; - continue; - } + return string_view{path}; + }); - val.first = pair.first; - last.first = pair.first; - val.first = lstrip(val.first, prefix); - val.first = lstrip(val.first, '.'); - val.first = split(val.first, '.').first; - val.second = value{pair.first, *t}; - break; - } + // Clear empty names from the array before passing up to row{} + const auto end(std::remove(names, names + std::distance(low, hi), string_view{})); + const auto count(std::distance(names, end)); + return row + { + *d, index, vector_view(names, count) + }; +}()} +,index{index} +{ } -/* template struct value :value { - // hold iterator - operator string_view() const { - std::cout << "read [" << this->name << "] " << std::endl; - return {}; + return string_view{static_cast(*this)}; + } + + operator string_view() + { + return string_view{static_cast(*this)}; } value &operator=(const string_view &val) { - std::cout << "write [" << this->name << "] " << val << std::endl; + static_cast(*this) = val; return *this; } - value(const string_view &name) - :value{name} + value(const string_view &col, + const string_view &row) + :value{col, row} {} friend std::ostream &operator<<(std::ostream &s, const value &v) @@ -235,37 +150,76 @@ struct value return s; } }; -*/ -template -struct value +template +struct arithmetic_value :value { - int64_t def; + bool compare_exchange(T &expected, const T &desired) + { + const auto ep(reinterpret_cast(&expected)); + const auto dp(reinterpret_cast(&desired)); - operator int64_t() const try - { - const auto val(read(this->h, this->t->index)); - return lex_cast(val); - } - catch(const not_found &e) - { - return def; + string_view s{ep, expected? sizeof(T) : 0}; + const auto ret(cell::compare_exchange(s, string_view{dp, sizeof(T)})); + expected = !s.empty()? *reinterpret_cast(s.data()) : 0; + return ret; } - value &operator=(const int64_t &val) + T exchange(const T &desired) { - write(this->h, this->t->index, lex_cast(val)); + const auto dp(reinterpret_cast(&desired)); + const auto ret(cell::exchange(string_view{dp, desired? sizeof(T) : 0})); + return !ret.empty()? *reinterpret_cast(ret.data()) : 0; + } + + operator T() const + { + const auto val(this->val()); + return !val.empty()? *reinterpret_cast(val.data()) : 0; + } + + operator T() + { + const auto val(this->val()); + return !val.empty()? *reinterpret_cast(val.data()) : 0; + } + + arithmetic_value &operator=(const T &val) + { + cell &cell(*this); + const auto ptr(reinterpret_cast(&val)); + cell = string_view{ptr, val? sizeof(T) : 0}; return *this; } - value(const string_view &name, - transaction &t, - const int64_t &def = 0) - :value{name, t} - ,def{def} + friend std::ostream &operator<<(std::ostream &s, const arithmetic_value &v) + { + s << T(v); + return s; + } + + arithmetic_value(const string_view &col, + const string_view &row) + :value{col, row} {} }; +#define IRCD_ARITHMETIC_VALUE(_type_) \ +template \ +struct value<_type_, d> \ +:arithmetic_value<_type_, d> \ +{ \ + using arithmetic_value<_type_, d>::arithmetic_value; \ +} + +IRCD_ARITHMETIC_VALUE(uint64_t); +IRCD_ARITHMETIC_VALUE(int64_t); +IRCD_ARITHMETIC_VALUE(uint32_t); +IRCD_ARITHMETIC_VALUE(int32_t); +IRCD_ARITHMETIC_VALUE(uint16_t); +IRCD_ARITHMETIC_VALUE(int16_t); + } // namespace db } // namespace ircd diff --git a/include/ircd/db/opts.h b/include/ircd/db/opts.h index 1f924fa64..2e9be8b15 100644 --- a/include/ircd/db/opts.h +++ b/include/ircd/db/opts.h @@ -58,6 +58,7 @@ enum class get NO_SNAPSHOT, // This iterator will have the latest data (tailing) NO_CHECKSUM, // Integrity of data will be checked unless this is specified READAHEAD, // Pair with a size in bytes for prefetching additional data + NO_EMPTY, // Option for db::row to not include unassigned cells in the row }; struct gopts diff --git a/include/ircd/db/row.h b/include/ircd/db/row.h index 2b791fc34..439890c11 100644 --- a/include/ircd/db/row.h +++ b/include/ircd/db/row.h @@ -26,41 +26,185 @@ namespace ircd { namespace db { -struct cell -{ - explicit - cell(database &, - column, - const string_view &key, - gopts opts = {}); -}; - struct row { - using key_type = column; - using mapped_type = std::unique_ptr; - using value_type = std::pair; + struct delta; + struct iterator; + struct const_iterator; + using value_type = cell &; + using reference = cell &; + using pointer = cell *; + using difference_type = size_t; - gopts opts; - std::vector its; + private: + std::vector its; template friend void seek(row &, const pos &); friend void seek(row &, const string_view &key); public: - auto begin() const { return std::begin(its); } - auto end() const { return std::end(its); } - auto begin() { return std::begin(its); } - auto end() { return std::end(its); } + // [GET] Iterations + const_iterator begin() const; + const_iterator end() const; + iterator begin(); + iterator end(); - string_view operator[](const string_view &column); + // [GET] Get iterator to cell + const_iterator find(const string_view &column) const; + iterator find(const string_view &column); - row(database &, const string_view &key = {}, gopts = {}); - row() = default; - row(row &&) noexcept; - row &operator=(row &&) noexcept; - ~row() noexcept; + auto empty() const { return its.empty(); } + auto size() const { return its.size(); } + + // [GET] Get cell + const cell &operator[](const string_view &column) const; + cell &operator[](const string_view &column); + + // [SET] Perform operation + void operator()(const op &, const string_view &col, const string_view &val = {}, const sopts & = {}); + + row(std::vector cells = {}) + :its{std::move(cells)} + {} + + row(database &, + const string_view &key = {}, + const vector_view &columns = {}, + const gopts &opts = {}); + + friend size_t trim(row &, const std::function &); + friend size_t trim(row &, const string_view &key); // remove invalid or not equal + friend size_t trim(row &); // remove invalid +}; + +struct row::const_iterator +{ + using value_type = const cell &; + using reference = const cell &; + using pointer = const cell *; + using iterator_category = std::bidirectional_iterator_tag; + + private: + friend class row; + + decltype(row::its)::const_iterator it; + + const_iterator(decltype(row::its)::const_iterator it) + :it{std::move(it)} + {} + + public: + reference operator*() const { return it.operator*(); } + pointer operator->() const { return it.operator->(); } + + const_iterator &operator++() { ++it; return *this; } + const_iterator &operator--() { --it; return *this; } + + const_iterator() = default; + + friend bool operator==(const const_iterator &, const const_iterator &); + friend bool operator!=(const const_iterator &, const const_iterator &); +}; + +struct row::iterator +{ + using value_type = cell &; + using reference = cell &; + using pointer = cell *; + using iterator_category = std::bidirectional_iterator_tag; + + private: + friend class row; + + decltype(row::its)::iterator it; + + iterator(decltype(row::its)::iterator it) + :it{std::move(it)} + {} + + public: + reference operator*() const { return it.operator*(); } + pointer operator->() const { return it.operator->(); } + + iterator &operator++() { ++it; return *this; } + iterator &operator--() { --it; return *this; } + + iterator() = default; + + friend bool operator==(const iterator &, const iterator &); + friend bool operator!=(const iterator &, const iterator &); }; } // namespace db } // namespace ircd + +inline ircd::db::cell & +ircd::db::row::operator[](const string_view &column) +{ + const auto it(find(column)); + if(unlikely(it == end())) + throw not_found("column '%s' does not exist", column); + + return *it; +} + +inline const ircd::db::cell & +ircd::db::row::operator[](const string_view &column) +const +{ + const auto it(find(column)); + if(unlikely(it == end())) + throw not_found("column '%s' does not exist", column); + + return *it; +} + +inline ircd::db::row::iterator +ircd::db::row::end() +{ + return { std::end(its) }; +} + +inline ircd::db::row::iterator +ircd::db::row::begin() +{ + return { std::begin(its) }; +} + +inline ircd::db::row::const_iterator +ircd::db::row::end() +const +{ + return { std::end(its) }; +} + +inline ircd::db::row::const_iterator +ircd::db::row::begin() +const +{ + return { std::begin(its) }; +} + +inline bool +ircd::db::operator!=(const row::iterator &a, const row::iterator &b) +{ + return a.it != b.it; +} + +inline bool +ircd::db::operator==(const row::iterator &a, const row::iterator &b) +{ + return a.it == b.it; +} + +inline bool +ircd::db::operator!=(const row::const_iterator &a, const row::const_iterator &b) +{ + return a.it != b.it; +} + +inline bool +ircd::db::operator==(const row::const_iterator &a, const row::const_iterator &b) +{ + return a.it == b.it; +} diff --git a/ircd/db.cc b/ircd/db.cc index ab06bb042..cf2eb54f0 100644 --- a/ircd/db.cc +++ b/ircd/db.cc @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,7 @@ struct throw_on_error const std::string &reflect(const rocksdb::Tickers &); const std::string &reflect(const rocksdb::Histograms &); rocksdb::Slice slice(const string_view &); +string_view slice(const rocksdb::Slice &); // Frequently used get options and set options are separate from the string/map system rocksdb::WriteOptions make_opts(const sopts &); @@ -73,18 +75,26 @@ void valid_or_throw(const rocksdb::Iterator &); bool valid_equal(const rocksdb::Iterator &, const string_view &); void valid_equal_or_throw(const rocksdb::Iterator &, const string_view &); -// re-seekers +// Direct re-seekers. Internal only. +void _seek_(rocksdb::Iterator &, const rocksdb::Slice &); +void _seek_(rocksdb::Iterator &, const string_view &); +void _seek_(rocksdb::Iterator &, const pos &); + +// Move an iterator +template void seek(database::column &, const pos &, rocksdb::ReadOptions &, std::unique_ptr &it); +template void seek(database::column &, const pos &, const gopts &, std::unique_ptr &it); template void seek(column::const_iterator &, const pos &); template void seek(row &, const pos &p); -// Initial seekers +// Query for an iterator. Returns a lower_bound on a key std::unique_ptr seek(column &, const gopts &); -std::unique_ptr seek(column &, const string_view &, const gopts &); +std::unique_ptr seek(column &, const string_view &key, const gopts &); std::vector seek(database &, const gopts &); std::pair operator*(const rocksdb::Iterator &); -void append(rocksdb::WriteBatch &, column &, const delta &delta); +void append(rocksdb::WriteBatch &, column &, const column::delta &delta); +void append(rocksdb::WriteBatch &, const cell::delta &delta); std::vector column_names(const std::string &path, const rocksdb::DBOptions &); std::vector column_names(const std::string &path, const std::string &options); @@ -188,7 +198,7 @@ struct database::column std::type_index key_type; std::type_index mapped_type; comparator cmp; - rocksdb::ColumnFamilyHandle *handle; + custom_ptr handle; public: operator const rocksdb::ColumnFamilyOptions &(); @@ -199,7 +209,12 @@ struct database::column operator rocksdb::ColumnFamilyHandle *(); operator database &(); - column(database *const &d, descriptor); + explicit column(database *const &d, descriptor); + column() = delete; + column(column &&) = delete; + column(const column &) = delete; + column &operator=(column &&) = delete; + column &operator=(const column &) = delete; ~column() noexcept; }; @@ -218,7 +233,10 @@ database::dbs void ircd::db::sync(database &d) { - throw_on_error(d.d->SyncWAL()); + throw_on_error + { + d.d->SyncWAL() + }; } uint64_t @@ -239,13 +257,25 @@ ircd::db::property(database &d, return ret; } +std::shared_ptr +ircd::db::shared_from(database::column &column) +{ + return column.shared_from_this(); +} + +std::shared_ptr +ircd::db::shared_from(const database::column &column) +{ + return column.shared_from_this(); +} + // // database // ircd::db::database::database(const std::string &name, const std::string &optstr, - std::initializer_list descriptor) + description description) try :name { @@ -274,23 +304,22 @@ try ,cache{[this]() -> std::shared_ptr { //TODO: XXX - /*{ - const auto ret(rocksdb::NewLRUCache(lru_cache_size)); - this->opts->row_cache = ret; - return ret; - }*/ - return {}; + const auto lru_cache_size{64_MiB}; + return rocksdb::NewLRUCache(lru_cache_size); }()} -,d{[this, &descriptor, &optstr]() -> custom_ptr +,d{[this, &description, &optstr]() -> custom_ptr { rocksdb::DBOptions opts { options(optstr) }; + // Setup sundry opts.error_if_exists = false; opts.create_if_missing = true; opts.create_missing_column_families = true; + opts.max_file_opening_threads = 0; + opts.use_fsync = true; // Setup logging logs->SetInfoLogLevel(ircd::debugmode? rocksdb::DEBUG_LEVEL : rocksdb::WARN_LEVEL); @@ -305,11 +334,15 @@ try //rocksdb::SetPerfLevel(rocksdb::PerfLevel::kDisable); // Setup journal recovery options - opts.wal_recovery_mode = rocksdb::WALRecoveryMode::kAbsoluteConsistency; - //opts.wal_recovery_mode = rocksdb::WALRecoveryMode::kPointInTimeRecovery; + //opts.wal_recovery_mode = rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords; + //opts.wal_recovery_mode = rocksdb::WALRecoveryMode::kAbsoluteConsistency; + opts.wal_recovery_mode = rocksdb::WALRecoveryMode::kPointInTimeRecovery; + + // Setup cache + opts.row_cache = this->cache; // Setup column families - for(auto &desc : descriptor) + for(auto &desc : description) { const auto c(std::make_shared(this, std::move(desc))); columns.emplace(c->name, c); @@ -327,14 +360,17 @@ try throw error("Failed to describe existing column '%s'", name); // Setup the database closer. - // We need customization here because of the column family thing. const auto deleter([this](rocksdb::DB *const d) noexcept { - throw_on_error(d->SyncWAL()); // blocking + throw_on_error + { + d->SyncWAL() // blocking + }; + columns.clear(); - rocksdb::CancelAllBackgroundWork(d, true); // true = blocking - throw_on_error(d->PauseBackgroundWork()); + //rocksdb::CancelAllBackgroundWork(d, true); // true = blocking + //throw_on_error(d->PauseBackgroundWork()); const auto seq(d->GetLatestSequenceNumber()); delete d; @@ -344,12 +380,6 @@ try seq); }); - // Announce attempt before usual point where exceptions are thrown - log.debug("Opening database \"%s\" @ `%s' columns[%zu]", - this->name, - path, - columns.size()); - // Open DB into ptr rocksdb::DB *ptr; std::vector handles; @@ -359,16 +389,31 @@ try { return static_cast(*pair.second); }); - +/* + if(fs::is_dir(path)) + { + log.info("Checking database @ `%s' columns[%zu]", path, columns.size()); + throw_on_error(rocksdb::RepairDB(path, opts, columns)); + log.info("Database @ `%s' check complete", path, columns.size()); + } //if(has_opt(opts, opt::READ_ONLY)) // throw_on_error(rocksdb::DB::OpenForReadOnly(*this->opts, path, columns, &handles, &ptr)); //else - throw_on_error(rocksdb::DB::Open(opts, path, columns, &handles, &ptr)); +*/ + // Announce attempt before usual point where exceptions are thrown + log.debug("Opening database \"%s\" @ `%s' columns[%zu]", + this->name, + path, + columns.size()); + + throw_on_error + { + rocksdb::DB::Open(opts, path, columns, &handles, &ptr) + }; for(const auto &handle : handles) - this->columns.at(handle->GetName())->handle = handle; + this->columns.at(handle->GetName())->handle.reset(handle); - // re-establish RAII here return { ptr, deleter }; }()} { @@ -395,16 +440,25 @@ noexcept ircd::db::database::column & ircd::db::database::operator[](const string_view &name) +try { return *columns.at(name); } +catch(const std::out_of_range &e) +{ + throw error("'%s': column '%s' is not available or specified in schema", this->name, name); +} const ircd::db::database::column & ircd::db::database::operator[](const string_view &name) -const +const try { return *columns.at(name); } +catch(const std::out_of_range &e) +{ + throw error("'%s': column '%s' is not available or specified in schema", this->name, name); +} ircd::db::database & ircd::db::database::get(column &column) @@ -536,8 +590,17 @@ ircd::db::database::column::column(database *const &d, ,key_type{desc.type.first} ,mapped_type{desc.type.second} ,cmp{d, std::move(desc.cmp)} -,handle{nullptr} +,handle { + nullptr, [this](rocksdb::ColumnFamilyHandle *const handle) + { + if(handle) + this->d->d->DestroyColumnFamilyHandle(handle); + } +} +{ + assert(d->columns.count(this->name) == 0); + if(!this->cmp.user.less) { if(key_type == typeid(string_view)) @@ -562,8 +625,6 @@ ircd::db::database::column::column(database *const &d, ircd::db::database::column::~column() noexcept { - if(handle) - d->d->DestroyColumnFamilyHandle(handle); } ircd::db::database::column::operator @@ -575,7 +636,7 @@ database &() ircd::db::database::column::operator rocksdb::ColumnFamilyHandle *() { - return handle; + return handle.get(); } ircd::db::database::column::operator @@ -589,7 +650,7 @@ ircd::db::database::column::operator const rocksdb::ColumnFamilyHandle *() const { - return handle; + return handle.get(); } void @@ -598,7 +659,10 @@ ircd::db::drop(database::column &c) if(!c.handle) return; - throw_on_error(c.d->d->DropColumnFamily(c.handle)); + throw_on_error + { + c.d->d->DropColumnFamily(c.handle.get()) + }; } uint32_t @@ -616,6 +680,12 @@ ircd::db::name(const database::column &c) return c.name; } +const std::string & +ircd::db::name(const database &d) +{ + return d.name; +} + /////////////////////////////////////////////////////////////////////////////// // // database::snapshot @@ -650,6 +720,11 @@ noexcept { } +/////////////////////////////////////////////////////////////////////////////// +// +// database::logs +// + static ircd::log::facility translate(const rocksdb::InfoLogLevel &level) @@ -705,6 +780,11 @@ ircd::db::database::logs::Logv(const rocksdb::InfoLogLevel level, log(translate(level), "'%s': (rdb) %s", d->name, str); } +/////////////////////////////////////////////////////////////////////////////// +// +// database::mergeop +// + const char * ircd::db::database::mergeop::Name() const @@ -756,6 +836,18 @@ catch(const std::exception &e) return false; } +/////////////////////////////////////////////////////////////////////////////// +// +// database::stats +// + +void +ircd::db::log_rdb_perf_context(const bool &all) +{ + const bool exclude_zeros(!all); + log.debug("%s", rocksdb::perf_context.ToString(exclude_zeros)); +} + bool ircd::db::database::stats::HistEnabledForType(const uint32_t type) const @@ -884,6 +976,250 @@ ircd::db::database::events::OnColumnFamilyHandleDeletionStarted(rocksdb::ColumnF h); } +/////////////////////////////////////////////////////////////////////////////// +// +// db/cell.h +// + +void +ircd::db::write(const sopts &sopts, + const std::initializer_list &deltas) +{ + write(deltas, sopts); +} + +void +ircd::db::write(const std::initializer_list &deltas, + const sopts &sopts) +{ + if(!deltas.size()) + return; + + auto &front(*std::begin(deltas)); + column &c(std::get(front).c); + database &d(c); + + rocksdb::WriteBatch batch; + for(const auto &delta : deltas) + append(batch, delta); + + auto opts(make_opts(sopts)); + throw_on_error + { + d.d->Write(opts, &batch) + }; +} + +void +ircd::db::write(const cell::delta &delta, + const sopts &sopts) +{ + column &c(std::get(delta).c); + database &d(c); + + rocksdb::WriteBatch batch; + append(batch, delta); + auto opts(make_opts(sopts)); + throw_on_error + { + d.d->Write(opts, &batch) + }; +} + +void +ircd::db::append(rocksdb::WriteBatch &batch, + const cell::delta &delta) +{ + auto &column(std::get(delta).c); + append(batch, column, column::delta + { + std::get(delta), + std::get(delta).index, + std::get(delta) + }); +} + +// Linkage for incomplete rocksdb::Iterator +ircd::db::cell::cell() +{ +} + +ircd::db::cell::cell(database &d, + const string_view &colname, + const string_view &index, + gopts opts) +:cell +{ + column(d[colname]), index, std::move(opts) +} +{ +} + +ircd::db::cell::cell(column column, + const string_view &index, + gopts opts) +:c{std::move(column)} +,index{index} +,ss{opts.snapshot} +,it{ss? seek(this->c, this->index, opts) : std::unique_ptr{}} +{ +} + +ircd::db::cell::cell(column column, + const string_view &index, + std::unique_ptr it) +:c{std::move(column)} +,index{index} +,it{std::move(it)} +{ +} + +// Linkage for incomplete rocksdb::Iterator +ircd::db::cell::cell(cell &&o) +noexcept +:c{std::move(o.c)} +,index{std::move(o.index)} +,ss{std::move(o.ss)} +,it{std::move(o.it)} +{ +} + +// Linkage for incomplete rocksdb::Iterator +ircd::db::cell & +ircd::db::cell::operator=(cell &&o) +noexcept +{ + c = std::move(o.c); + index = std::move(o.index); + ss = std::move(o.ss); + it = std::move(o.it); + + return *this; +} + +// Linkage for incomplete rocksdb::Iterator +ircd::db::cell::~cell() +noexcept +{ +} + +bool +ircd::db::cell::load(gopts opts) +{ + database &d(c); + if(valid() && !opts.snapshot && sequence(ss) == sequence(d)) + return true; + + if(bool(opts.snapshot)) + { + this->it.reset(); + this->ss = std::move(opts.snapshot); + } + + std::unique_ptr tit; + throw_on_error(d.d->GetUpdatesSince(0, &tit)); + while(tit && tit->Valid()) + { + auto batchres(tit->GetBatch()); + std::cout << "seq: " << batchres.sequence; + if(batchres.writeBatchPtr) + { + auto &batch(*batchres.writeBatchPtr); + std::cout << " count " << batch.Count() << " ds: " << batch.GetDataSize() + << " " << batch.Data() << std::endl; + } + + tit->Next(); + } + + + database::column &c(this->c); + seek(c, index, opts, it); + return valid(); +} + +ircd::string_view +ircd::db::cell::exchange(const string_view &desired) +{ + const auto ret(val()); + (*this) = desired; + return ret; +} + +bool +ircd::db::cell::compare_exchange(string_view &expected, + const string_view &desired) +{ + const auto existing(val()); + if(expected.size() != existing.size() || + memcmp(expected.data(), existing.data(), expected.size()) != 0) + { + expected = existing; + return false; + } + + expected = existing; + (*this) = desired; + return true; +} + +ircd::db::cell & +ircd::db::cell::operator=(const string_view &s) +{ + write(c, index, s); + return *this; +} + +ircd::db::cell::operator string_view() +{ + return val(); +} + +ircd::db::cell::operator string_view() +const +{ + return val(); +} + +ircd::string_view +ircd::db::cell::val() +{ + if(!valid()) + load(); + + return likely(valid())? db::val(*it) : string_view{}; +} + +ircd::string_view +ircd::db::cell::key() +{ + if(!valid()) + load(); + + return likely(valid())? db::key(*it) : index; +} + +ircd::string_view +ircd::db::cell::val() +const +{ + return likely(valid())? db::val(*it) : string_view{}; +} + +ircd::string_view +ircd::db::cell::key() +const +{ + return likely(valid())? db::key(*it) : index; +} + +bool +ircd::db::cell::valid() +const +{ + return it && valid_equal(*it, index); +} + /////////////////////////////////////////////////////////////////////////////// // // db/row.h @@ -891,18 +1227,61 @@ ircd::db::database::events::OnColumnFamilyHandleDeletionStarted(rocksdb::ColumnF ircd::db::row::row(database &d, const string_view &key, - gopts opts) -:opts{std::move(opts)} -,its{[this, &d] + const vector_view &colnames, + const gopts &opts) +:its{[this, &d, &key, &colnames, &opts] { - return seek(d, this->opts); + using std::end; + using std::begin; + using rocksdb::Iterator; + using rocksdb::ColumnFamilyHandle; + + const rocksdb::ReadOptions options + { + make_opts(opts) + }; + + std::vector colptr + { + colnames.empty()? d.columns.size() : colnames.size() + }; + + if(colnames.empty()) + std::transform(begin(d.columns), end(d.columns), begin(colptr), [&colnames] + (const auto &p) + { + return p.second.get(); + }); + else + std::transform(begin(colnames), end(colnames), begin(colptr), [&d] + (const auto &name) + { + return &d[name]; + }); + + std::vector handles(colptr.size()); + std::transform(begin(colptr), end(colptr), begin(handles), [] + (database::column *const &ptr) + { + return ptr->handle.get(); + }); + + std::vector iterators; + throw_on_error + { + d.d->NewIterators(options, handles, &iterators) + }; + + std::vector ret(iterators.size()); + for(size_t i(0); i < ret.size(); ++i) + { + std::unique_ptr it(iterators.at(i)); + ret[i] = cell { *colptr.at(i), key, std::move(it) }; + } + + return ret; }()} { - // Piggyback on the snapshot's reference to database. - // This has to be set here if gopts.snapshot was default initialized. - if(!this->opts.snapshot) - this->opts.snapshot.d = weak_from(d); - if(key.empty()) { seek(*this, pos::FRONT); @@ -910,63 +1289,120 @@ ircd::db::row::row(database &d, } seek(*this, key); - const auto end + + // without the noempty flag, all cells for a row show up in the row + // i.e all the columns of the db, etc + const bool noempty { - std::remove_if(std::begin(its), std::end(its), [&key] - (auto &pair) + has_opt(opts, get::NO_EMPTY) + }; + + const auto trimmer([&key, &noempty] + (auto &cell) + { + if(noempty) + return cell.key() != key; + + // seek() returns a lower_bound so we have to compare equality + // here to not give the user data from the wrong row. The cell itself + // is not removed to allow the column to be visible in the row. + if(cell.key() != key) + cell.it.reset(); + + return false; + }); + + trim(*this, trimmer); +} + +void +ircd::db::row::operator()(const op &op, + const string_view &col, + const string_view &val, + const sopts &sopts) +{ + write(cell::delta{op, (*this)[col], val}, sopts); +} + +size_t +ircd::db::trim(row &r) +{ + return trim(r, [] + (const auto &cell) + { + return !valid(*cell.it); + }); +} + +size_t +ircd::db::trim(row &r, + const string_view &index) +{ + return trim(r, [&index] + (const auto &cell) + { + return !valid_equal(*cell.it, index); + }); +} + +size_t +ircd::db::trim(row &r, + const std::function &closure) +{ + const auto end(std::remove_if(std::begin(r.its), std::end(r.its), closure)); + const auto ret(std::distance(end, std::end(r.its))); + r.its.erase(end, std::end(r.its)); + r.its.shrink_to_fit(); + return ret; +} + +void +ircd::db::seek(row &r, + const string_view &s) +{ + seek(r, s); +} + +template +void +ircd::db::seek(row &r, + const pos &p) +{ + ctx::offload([&r, &p] + { + std::for_each(begin(r.its), end(r.its), [&p] + (auto &cell) { - rocksdb::Iterator &it{*pair.second}; - return !valid_equal(it, key); - }) - }; - its.erase(end, std::end(its)); + _seek_(cell, p); + }); + }); } -ircd::db::row::row(row &&o) -noexcept -:opts{std::move(o.opts)} -,its{std::move(o.its)} +ircd::db::row::iterator +ircd::db::row::find(const string_view &col) { -} - -ircd::db::row & -ircd::db::row::operator=(row &&o) -noexcept -{ - its = std::move(o.its); - opts = std::move(o.opts); - - return *this; -} - -ircd::db::row::~row() -noexcept -{ -} - -ircd::string_view -ircd::db::row::operator[](const string_view &colname) -{ - const auto it(std::find_if(begin(), end(), [&colname] - (const auto &pair) + iterator ret; + ret.it = std::find_if(std::begin(its), std::end(its), [&col] + (const auto &cell) { - auto &column(pair.first); - return name(column) == colname; - })); + return name(cell.c) == col; + }); - if(it == end()) - return {}; + return ret; +} - rocksdb::Iterator &rit +ircd::db::row::const_iterator +ircd::db::row::find(const string_view &col) +const +{ + const_iterator ret; + ret.it = std::find_if(std::begin(its), std::end(its), [&col] + (const auto &cell) { - *it->second - }; + return name(cell.c) == col; + }); - if(!rit) - return {}; - - const auto pair(*rit); - return pair.second; + return ret; } /////////////////////////////////////////////////////////////////////////////// @@ -1059,7 +1495,8 @@ ircd::db::bytes(column &column) rocksdb::ColumnFamilyMetaData cfm; database::column &c(column); database &d(c); - d.d->GetColumnFamilyMetaData(c.handle, &cfm); + assert(bool(c.handle)); + d.d->GetColumnFamilyMetaData(c.handle.get(), &cfm); return cfm.size; } @@ -1069,7 +1506,8 @@ ircd::db::file_count(column &column) rocksdb::ColumnFamilyMetaData cfm; database::column &c(column); database &d(c); - d.d->GetColumnFamilyMetaData(c.handle, &cfm); + assert(bool(c.handle)); + d.d->GetColumnFamilyMetaData(c.handle.get(), &cfm); return cfm.file_count; } @@ -1084,6 +1522,21 @@ ircd::db::name(const column &column) // column // +ircd::db::column::column(database::column &c) +:c{shared_from(c)} +{ +} + +ircd::db::column::column(std::shared_ptr c) +:c{std::move(c)} +{ +} + +ircd::db::column::column(database &d, + const string_view &column_name) +:c{shared_from(d[column_name])} +{} + void ircd::db::flush(column &column, const bool &blocking) @@ -1093,8 +1546,15 @@ ircd::db::flush(column &column, rocksdb::FlushOptions opts; opts.wait = blocking; + log.debug("'%s':'%s' @%lu FLUSH", + name(d), + name(c), + sequence(d)); - throw_on_error(d.d->Flush(opts, c)); + throw_on_error + { + d.d->Flush(opts, c) + }; } void @@ -1104,9 +1564,17 @@ ircd::db::del(column &column, { database &d(column); database::column &c(column); + log.debug("'%s':'%s' @%lu DELETE key(%zu B)", + name(d), + name(c), + sequence(d), + key.size()); auto opts(make_opts(sopts)); - throw_on_error(d.d->Delete(opts, c, slice(key))); + throw_on_error + { + d.d->Delete(opts, c, slice(key)) + }; } void @@ -1132,15 +1600,24 @@ ircd::db::write(column &column, { database &d(column); database::column &c(column); + log.debug("'%s':'%s' @%lu PUT key(%zu B) val(%zu B)", + name(d), + name(c), + sequence(d), + key.size(), + val.size()); auto opts(make_opts(sopts)); - throw_on_error(d.d->Put(opts, c, slice(key), slice(val))); + throw_on_error + { + d.d->Put(opts, c, slice(key), slice(val)) + }; } void ircd::db::append(rocksdb::WriteBatch &batch, column &column, - const delta &delta) + const column::delta &delta) { database::column &c(column); @@ -1187,6 +1664,14 @@ ircd::db::has(column &column, }); } + log.debug("'%s':'%s' @%lu HAS key(%zu B) %s [%s]", + name(d), + name(c), + sequence(d), + key.size(), + status.ok()? "YES"s : "NO"s, + opts.read_tier == BLOCKING? "CACHE MISS"s : "CACHE HIT"s); + // Finally the result switch(status.code()) { @@ -1209,6 +1694,13 @@ ircd::db::column::operator()(const op &op, operator()(delta{op, key, val}, sopts); } +void +ircd::db::column::operator()(const sopts &sopts, + const std::initializer_list &deltas) +{ + operator()(deltas, sopts); +} + void ircd::db::column::operator()(const std::initializer_list &deltas, const sopts &sopts) @@ -1219,7 +1711,10 @@ ircd::db::column::operator()(const std::initializer_list &deltas, database &d(*this); auto opts(make_opts(sopts)); - throw_on_error(d.d->Write(opts, &batch)); + throw_on_error + { + d.d->Write(opts, &batch) + }; } void @@ -1231,7 +1726,10 @@ ircd::db::column::operator()(const delta &delta, database &d(*this); auto opts(make_opts(sopts)); - throw_on_error(d.d->Write(opts, &batch)); + throw_on_error + { + d.d->Write(opts, &batch) + }; } void @@ -1249,11 +1747,17 @@ ircd::db::column::operator()(const string_view &key, { const auto it(seek(*this, key, gopts)); valid_equal_or_throw(*it, key); - - const auto &v(it->value()); - func(string_view{v.data(), v.size()}); + func(val(*it)); } +ircd::db::cell +ircd::db::column::operator[](const string_view &key) +const +{ + return { *this, key }; +} + + /////////////////////////////////////////////////////////////////////////////// // // column::const_iterator @@ -1286,7 +1790,6 @@ ircd::db::column::cend(const gopts &gopts) ircd::db::column::const_iterator ircd::db::column::cbegin(const gopts &gopts) { - database::column &c(*this); const_iterator ret { c, {}, gopts @@ -1322,7 +1825,6 @@ ircd::db::column::const_iterator ircd::db::column::lower_bound(const string_view &key, const gopts &gopts) { - database::column &c(*this); const_iterator ret { c, {}, gopts @@ -1332,11 +1834,6 @@ ircd::db::column::lower_bound(const string_view &key, return std::move(ret); } -ircd::db::column::const_iterator::const_iterator() -:c{nullptr} -{ -} - ircd::db::column::const_iterator::const_iterator(const_iterator &&o) noexcept :opts{std::move(o.opts)} @@ -1350,21 +1847,22 @@ ircd::db::column::const_iterator & ircd::db::column::const_iterator::operator=(const_iterator &&o) noexcept { - this->~const_iterator(); - opts = std::move(o.opts); c = std::move(o.c); it = std::move(o.it); val = std::move(o.val); - return *this; } -ircd::db::column::const_iterator::const_iterator(database::column &c, +ircd::db::column::const_iterator::const_iterator() +{ +} + +ircd::db::column::const_iterator::const_iterator(std::shared_ptr c, std::unique_ptr &&it, gopts opts) :opts{std::move(opts)} -,c{&c} +,c{std::move(c)} ,it{std::move(it)} { //if(!has_opt(this->opts, get::READAHEAD)) @@ -1394,12 +1892,9 @@ const ircd::db::column::const_iterator::value_type & ircd::db::column::const_iterator::operator*() const { - const auto &k(it->key()); - const auto &v(it->value()); - - val.first = { k.data(), k.size() }; - val.second = { v.data(), v.size() }; - + assert(valid(*it)); + val.first = db::key(*it); + val.second = db::val(*it); return val; } @@ -1410,86 +1905,6 @@ const return &operator*(); } -bool -ircd::db::column::const_iterator::operator>=(const const_iterator &o) -const -{ - return (*this > o) || (*this == o); -} - -bool -ircd::db::column::const_iterator::operator<=(const const_iterator &o) -const -{ - return (*this < o) || (*this == o); -} - -bool -ircd::db::column::const_iterator::operator!=(const const_iterator &o) -const -{ - return !(*this == o); -} - -bool -ircd::db::column::const_iterator::operator==(const const_iterator &o) -const -{ - if(*this && o) - { - const auto &a(it->key()); - const auto &b(o.it->key()); - return a.compare(b) == 0; - } - - if(!*this && !o) - return true; - - return false; -} - -bool -ircd::db::column::const_iterator::operator>(const const_iterator &o) -const -{ - if(*this && o) - { - const auto &a(it->key()); - const auto &b(o.it->key()); - return a.compare(b) == 1; - } - - if(!*this && o) - return true; - - if(!*this && !o) - return false; - - assert(!*this && o); - return false; -} - -bool -ircd::db::column::const_iterator::operator<(const const_iterator &o) -const -{ - if(*this && o) - { - const auto &a(it->key()); - const auto &b(o.it->key()); - return a.compare(b) == -1; - } - - if(!*this && o) - return false; - - if(!*this && !o) - return false; - - assert(*this && !o); - return true; -} - bool ircd::db::column::const_iterator::operator!() const @@ -1509,6 +1924,197 @@ const return !!*this; } +bool +ircd::db::operator!=(const column::const_iterator &a, const column::const_iterator &b) +{ + return !(a == b); +} + +bool +ircd::db::operator==(const column::const_iterator &a, const column::const_iterator &b) +{ + if(a && b) + { + const auto &ak(a.it->key()); + const auto &bk(b.it->key()); + return ak.compare(bk) == 0; + } + + if(!a && !b) + return true; + + return false; +} + +bool +ircd::db::operator>(const column::const_iterator &a, const column::const_iterator &b) +{ + if(a && b) + { + const auto &ak(a.it->key()); + const auto &bk(b.it->key()); + return ak.compare(bk) == 1; + } + + if(!a && b) + return true; + + if(!a && !b) + return false; + + assert(!a && b); + return false; +} + +bool +ircd::db::operator<(const column::const_iterator &a, const column::const_iterator &b) +{ + if(a && b) + { + const auto &ak(a.it->key()); + const auto &bk(b.it->key()); + return ak.compare(bk) == -1; + } + + if(!a && b) + return false; + + if(!a && !b) + return false; + + assert(a && !b); + return true; +} + +template +void +ircd::db::seek(column::const_iterator &it, + const pos &p) +{ + database::column &c(it); + database &d(*c.d); + const gopts &gopts(it); + auto opts + { + make_opts(gopts, true) + }; + + seek(c, p, opts, it.it); +} + +void +ircd::db::seek(column::const_iterator &it, + const string_view &s) +{ + seek(it, s); +} + +/////////////////////////////////////////////////////////////////////////////// +// +// seek +// + +std::unique_ptr +ircd::db::seek(column &column, + const string_view &key, + const gopts &opts) +{ + using rocksdb::Iterator; + + database &d(column); + database::column &c(column); + std::unique_ptr ret; + seek(c, key, opts, ret); + return std::move(ret); +} + +template +void +ircd::db::seek(database::column &c, + const pos &p, + const gopts &gopts, + std::unique_ptr &it) +{ + auto opts + { + make_opts(gopts) + }; + + seek(c, p, opts, it); +} + +template +void +ircd::db::seek(database::column &c, + const pos &p, + rocksdb::ReadOptions &opts, + std::unique_ptr &it) +{ + database &d(*c.d); + + // Start with a non-blocking query + if(!it || opts.read_tier == BLOCKING) + { + opts.read_tier = NON_BLOCKING; + it.reset(d.d->NewIterator(opts, c)); + } + + _seek_(*it, p); + if(it->status().IsIncomplete()) + { + // DB cache miss: reset the iterator to blocking mode and offload it + opts.read_tier = BLOCKING; + it.reset(d.d->NewIterator(opts, c)); + ctx::offload([&it, &p] + { + _seek_(*it, p); + }); + } + + log.debug("'%s':'%s' @%lu SEEK [valid: %d] [%s]", + name(d), + name(c), + sequence(d), + valid(*it), + opts.read_tier == BLOCKING? "CACHE MISS"s : "CACHE HIT"s); +} + +void +ircd::db::_seek_(rocksdb::Iterator &it, + const pos &p) +{ + switch(p) + { + case pos::NEXT: it.Next(); break; + case pos::PREV: it.Prev(); break; + case pos::FRONT: it.SeekToFirst(); break; + case pos::BACK: it.SeekToLast(); break; + default: + case pos::END: + { + it.SeekToLast(); + if(it.Valid()) + it.Next(); + + break; + } + } +} + +void +ircd::db::_seek_(rocksdb::Iterator &it, + const string_view &sv) +{ + _seek_(it, slice(sv)); +} + +void +ircd::db::_seek_(rocksdb::Iterator &it, + const rocksdb::Slice &sk) +{ + it.Seek(sk); +} + /////////////////////////////////////////////////////////////////////////////// // // Misc @@ -1527,7 +2133,11 @@ ircd::db::column_names(const std::string &path, try { std::vector ret; - throw_on_error(rocksdb::DB::ListColumnFamilies(opts, path, &ret)); + throw_on_error + { + rocksdb::DB::ListColumnFamilies(opts, path, &ret) + }; + return ret; } catch(const io_error &e) @@ -1544,25 +2154,39 @@ ircd::db::database::options::options(const database &d) } ircd::db::database::options::options(const database::column &c) -:options{rocksdb::ColumnFamilyOptions{c.d->d->GetOptions(c.handle)}} +:options { -} + rocksdb::ColumnFamilyOptions + { + c.d->d->GetOptions(c.handle.get()) + } +}{} ircd::db::database::options::options(const rocksdb::DBOptions &opts) { - throw_on_error{rocksdb::GetStringFromDBOptions(this, opts)}; + throw_on_error + { + rocksdb::GetStringFromDBOptions(this, opts) + }; } ircd::db::database::options::options(const rocksdb::ColumnFamilyOptions &opts) { - throw_on_error{rocksdb::GetStringFromColumnFamilyOptions(this, opts)}; + throw_on_error + { + rocksdb::GetStringFromColumnFamilyOptions(this, opts) + }; } ircd::db::database::options::operator rocksdb::PlainTableOptions() const { rocksdb::PlainTableOptions ret; - throw_on_error{rocksdb::GetPlainTableOptionsFromString(ret, *this, &ret)}; + throw_on_error + { + rocksdb::GetPlainTableOptionsFromString(ret, *this, &ret) + }; + return ret; } @@ -1570,7 +2194,11 @@ ircd::db::database::options::operator rocksdb::BlockBasedTableOptions() const { rocksdb::BlockBasedTableOptions ret; - throw_on_error{rocksdb::GetBlockBasedTableOptionsFromString(ret, *this, &ret)}; + throw_on_error + { + rocksdb::GetBlockBasedTableOptionsFromString(ret, *this, &ret) + }; + return ret; } @@ -1578,7 +2206,11 @@ ircd::db::database::options::operator rocksdb::ColumnFamilyOptions() const { rocksdb::ColumnFamilyOptions ret; - throw_on_error{rocksdb::GetColumnFamilyOptionsFromString(ret, *this, &ret)}; + throw_on_error + { + rocksdb::GetColumnFamilyOptionsFromString(ret, *this, &ret) + }; + return ret; } @@ -1586,7 +2218,11 @@ ircd::db::database::options::operator rocksdb::DBOptions() const { rocksdb::DBOptions ret; - throw_on_error{rocksdb::GetDBOptionsFromString(ret, *this, &ret)}; + throw_on_error + { + rocksdb::GetDBOptionsFromString(ret, *this, &ret) + }; + return ret; } @@ -1594,20 +2230,31 @@ ircd::db::database::options::operator rocksdb::Options() const { rocksdb::Options ret; - throw_on_error{rocksdb::GetOptionsFromString(ret, *this, &ret)}; + throw_on_error + { + rocksdb::GetOptionsFromString(ret, *this, &ret) + }; + return ret; } ircd::db::database::options::map::map(const options &o) { - throw_on_error{rocksdb::StringToMap(o, this)}; + throw_on_error + { + rocksdb::StringToMap(o, this) + }; } ircd::db::database::options::map::operator rocksdb::PlainTableOptions() const { rocksdb::PlainTableOptions ret; - throw_on_error{rocksdb::GetPlainTableOptionsFromMap(ret, *this, &ret)}; + throw_on_error + { + rocksdb::GetPlainTableOptionsFromMap(ret, *this, &ret) + }; + return ret; } @@ -1615,7 +2262,11 @@ ircd::db::database::options::map::operator rocksdb::BlockBasedTableOptions() const { rocksdb::BlockBasedTableOptions ret; - throw_on_error{rocksdb::GetBlockBasedTableOptionsFromMap(ret, *this, &ret)}; + throw_on_error + { + rocksdb::GetBlockBasedTableOptionsFromMap(ret, *this, &ret) + }; + return ret; } @@ -1623,7 +2274,11 @@ ircd::db::database::options::map::operator rocksdb::ColumnFamilyOptions() const { rocksdb::ColumnFamilyOptions ret; - throw_on_error{rocksdb::GetColumnFamilyOptionsFromMap(ret, *this, &ret)}; + throw_on_error + { + rocksdb::GetColumnFamilyOptionsFromMap(ret, *this, &ret) + }; + return ret; } @@ -1631,7 +2286,11 @@ ircd::db::database::options::map::operator rocksdb::DBOptions() const { rocksdb::DBOptions ret; - throw_on_error{rocksdb::GetDBOptionsFromMap(ret, *this, &ret)}; + throw_on_error + { + rocksdb::GetDBOptionsFromMap(ret, *this, &ret) + }; + return ret; } @@ -1644,6 +2303,8 @@ ircd::db::make_opts(const gopts &opts, if(iterator) ret.fill_cache = false; + else + ret.fill_cache = true; for(const auto &opt : opts) switch(opt.first) { @@ -1703,192 +2364,6 @@ ircd::db::make_opts(const sopts &opts) return ret; } -namespace ircd { -namespace db { - -void seek(rocksdb::Iterator &, const rocksdb::Slice &); -void seek(rocksdb::Iterator &, const string_view &); -void seek(rocksdb::Iterator &, const pos &); - -} // namespace db -} // namespace ircd - -std::vector -ircd::db::seek(database &d, - const gopts &gopts) -{ - using rocksdb::Iterator; - using rocksdb::ColumnFamilyHandle; - - const auto opts - { - make_opts(gopts, true) - }; - - std::vector iterators; - std::vector column(d.columns.size()); - std::transform(begin(d.columns), end(d.columns), begin(column), [] - (const auto &p) - { - return p.second.get(); - }); - - std::vector columns(column.size()); - std::transform(begin(column), end(column), begin(columns), [] - (const auto &ptr) - { - return ptr->handle; - }); - - throw_on_error - { - d.d->NewIterators(opts, columns, &iterators) - }; - - std::vector ret(iterators.size()); - for(size_t i(0); i < ret.size(); ++i) - { - std::unique_ptr it(iterators.at(i)); - ret[i] = std::make_pair(db::column{*column.at(i)}, std::move(it)); - } - - return ret; -} - -std::unique_ptr -ircd::db::seek(column &column, - const string_view &key, - const gopts &gopts) -{ - using rocksdb::Iterator; - - database &d(column); - database::column &c(column); - auto opts - { - make_opts(gopts, true) - }; - - // Perform a query which won't be allowed to do kernel IO - opts.read_tier = NON_BLOCKING; - - std::unique_ptr it(d.d->NewIterator(opts, c)); - seek(*it, key); - - if(it->status().IsIncomplete()) - { - // DB cache miss: reset the iterator to blocking mode and offload it - opts.read_tier = BLOCKING; - it.reset(d.d->NewIterator(opts, c)); - ctx::offload([&it, &key] - { - seek(*it, key); - }); - } - // else DB cache hit; no context switch; no thread switch; no kernel I/O; gg - - return std::move(it); -} - -template -void -ircd::db::seek(row &r, - const pos &p) -{ - ctx::offload([&r, &p] - { - std::for_each(begin(r.its), end(r.its), [&p] - (const auto &pair) - { - rocksdb::Iterator &it(*pair.second); - seek(it, p); - }); - }); -} - -void -ircd::db::seek(row &r, - const string_view &s) -{ - seek(r, s); -} - -template -void -ircd::db::seek(column::const_iterator &it, - const pos &p) -{ - const gopts &gopts(it); - database::column &c(it); - database &d(*c.d); - auto opts - { - make_opts(gopts, true) - }; - - // Start with a non-blocking query - if(!it.it || opts.read_tier == BLOCKING) - { - opts.read_tier = NON_BLOCKING; - it.it.reset(d.d->NewIterator(opts, c)); - } - - seek(*it.it, p); - if(it.it->status().IsIncomplete()) - { - // DB cache miss: reset the iterator to blocking mode and offload it - opts.read_tier = BLOCKING; - it.it.reset(d.d->NewIterator(opts, c)); - ctx::offload([&it, &p] - { - seek(*it.it, p); - }); - } -} - -void -ircd::db::seek(column::const_iterator &it, - const string_view &s) -{ - seek(it, s); -} - -void -ircd::db::seek(rocksdb::Iterator &it, - const pos &p) -{ - switch(p) - { - case pos::NEXT: it.Next(); break; - case pos::PREV: it.Prev(); break; - case pos::FRONT: it.SeekToFirst(); break; - case pos::BACK: it.SeekToLast(); break; - default: - case pos::END: - { - it.SeekToLast(); - if(it.Valid()) - it.Next(); - - break; - } - } -} - -void -ircd::db::seek(rocksdb::Iterator &it, - const string_view &sv) -{ - seek(it, slice(sv)); -} - -void -ircd::db::seek(rocksdb::Iterator &it, - const rocksdb::Slice &sk) -{ - it.Seek(sk); -} - void ircd::db::valid_equal_or_throw(const rocksdb::Iterator &it, const string_view &sv) @@ -1977,13 +2452,19 @@ ircd::db::path(const std::string &name) std::pair ircd::db::operator*(const rocksdb::Iterator &it) { - const auto &k(it.key()); - const auto &v(it.value()); - return - { - { k.data(), k.size() }, - { v.data(), v.size() } - }; + return { key(it), val(it) }; +} + +ircd::string_view +ircd::db::key(const rocksdb::Iterator &it) +{ + return slice(it.key()); +} + +ircd::string_view +ircd::db::val(const rocksdb::Iterator &it) +{ + return slice(it.value()); } rocksdb::Slice @@ -1992,6 +2473,12 @@ ircd::db::slice(const string_view &sv) return { sv.data(), sv.size() }; } +ircd::string_view +ircd::db::slice(const rocksdb::Slice &sk) +{ + return { sk.data(), sk.size() }; +} + const std::string & ircd::db::reflect(const rocksdb::Tickers &type) {