From 8296adb6c319e8f45c5c07c6e5a0740a72f50d46 Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Wed, 31 Jan 2018 18:49:40 -0800 Subject: [PATCH] ircd::m::state: Try using a separate child array to store values at any level (partial). --- include/ircd/m/state.h | 19 +++++++++++------ ircd/m/state.cc | 48 +++++++++++++++++++++++++++++++++--------- modules/db/events.cc | 25 +++++++++------------- 3 files changed, 60 insertions(+), 32 deletions(-) diff --git a/include/ircd/m/state.h b/include/ircd/m/state.h index 8c4711e00..5e82793cf 100644 --- a/include/ircd/m/state.h +++ b/include/ircd/m/state.h @@ -19,17 +19,18 @@ namespace ircd::m::state using node_closure = std::function; using key_closure = std::function; - constexpr size_t ID_MAX_SZ { 64 }; - constexpr size_t KEY_MAX_SZ { 256 + 256 + 16 }; - constexpr size_t NODE_MAX_SZ { 4_KiB }; + constexpr size_t ID_MAX_SZ { 64 }; + constexpr size_t KEY_MAX_SZ { 256 + 256 + 16 }; + constexpr size_t VAL_MAX_SZ { 256 + 16 }; + constexpr size_t NODE_MAX_SZ { 4_KiB }; + constexpr int8_t MAX_HEIGHT { 16 }; // good for few mil at any degree :) int keycmp(const json::array &a, const json::array &b); json::array make_key(const mutable_buffer &out, const string_view &type, const string_view &state_key); void make_key(const string_view &type, const string_view &state_key, const key_closure &); - json::object make_node(const mutable_buffer &out, const json::array *const &keys, const size_t &kn, const string_view *const &vals, const size_t &vn); - json::object make_node(const mutable_buffer &out, const node &old, const size_t &pos, const json::array &key, const string_view &val); + json::object make_node(const mutable_buffer &out, const json::array *const &keys, const size_t &kn, const string_view *const &vals, const size_t &vn, const string_view *const &child, const size_t &cn); template string_view set_node(db::txn &txn, const mutable_buffer &id, args&&...); void get_node(db::column &, const string_view &id, const node_closure &); @@ -54,6 +55,7 @@ namespace ircd::m::state::name { constexpr const char *const k {"k"}; constexpr const char *const v {"v"}; + constexpr const char *const c {"c"}; } #pragma GCC diagnostic push @@ -62,16 +64,19 @@ struct ircd::m::state::node :json::tuple < json::property, - json::property + json::property, + json::property > { size_t keys() const; size_t vals() const; - size_t children() const; + size_t childs() const; json::array key(const size_t &) const; string_view val(const size_t &) const; + string_view child(const size_t &) const; + bool has_child(const size_t &) const; size_t find(const json::array &key) const; using super_type::tuple; diff --git a/ircd/m/state.cc b/ircd/m/state.cc index 4263962c9..84d71a07b 100644 --- a/ircd/m/state.cc +++ b/ircd/m/state.cc @@ -310,16 +310,19 @@ ircd::m::state::make_node(const mutable_buffer &out, /// Prints a node into the buffer `out` using the keys and vals arguments /// which must be pointers to arrays. Size of each array is specified in /// the following argument. Each array must have at least one element each. -/// the vals array can have one more element than the keys array if desired. +/// the chld array can have one more element than the keys array if desired. ircd::json::object ircd::m::state::make_node(const mutable_buffer &out, const json::array *const &keys_, const size_t &kn, const string_view *const &vals_, - const size_t &vn) + const size_t &vn, + const string_view *const &chld_, + const size_t &cn) { assert(kn > 0 && vn > 0); - assert(kn == vn || kn + 1 == vn); + assert(kn == vn); + assert(cn <= kn + 1); json::value keys[kn]; { @@ -333,11 +336,18 @@ ircd::m::state::make_node(const mutable_buffer &out, vals[i] = vals_[i]; }; + json::value chld[cn]; + { + for(size_t i(0); i < cn; ++i) + chld[i] = chld_[i]; + }; + json::iov iov; const json::iov::push push[] { { iov, { "k"_sv, { keys, kn } } }, { iov, { "v"_sv, { vals, vn } } }, + { iov, { "c"_sv, { chld, cn } } }, }; return { data(out), json::print(out, iov) }; @@ -427,12 +437,29 @@ const return ret; } +// Count values that actually lead to other nodes +bool +ircd::m::state::node::has_child(const size_t &pos) +const +{ + return !empty(child(pos)); +} + +ircd::string_view +ircd::m::state::node::child(const size_t &pos) +const +{ + const json::array children{json::get<"c"_>(*this, json::empty_array)}; + return unquote(children[pos]); +} + // Get value at position pos (throws out_of_range) ircd::string_view ircd::m::state::node::val(const size_t &pos) const { - return unquote(json::at<"v"_>(*this).at(pos)); + const json::array values{json::get<"v"_>(*this, json::empty_array)}; + return unquote(values[pos]); } // Get key at position pos (throws out_of_range) @@ -440,18 +467,19 @@ ircd::json::array ircd::m::state::node::key(const size_t &pos) const { - return json::at<"k"_>(*this).at(pos); + const json::array keys{json::get<"k"_>(*this, json::empty_array)}; + const json::array ret{keys[pos]}; + return ret; } -// Count values that actually lead to other nodes +// Count children in node size_t -ircd::m::state::node::children() +ircd::m::state::node::childs() const { size_t ret(0); - for(const auto &v : json::get<"v"_>(*this)) - if(!valid(id::EVENT, v)) - ++ret; + for(const auto &c : json::get<"c"_>(*this)) + ret += !empty(c); return ret; } diff --git a/modules/db/events.cc b/modules/db/events.cc index 48435fb83..f782077b1 100644 --- a/modules/db/events.cc +++ b/modules/db/events.cc @@ -819,8 +819,13 @@ const database::description events_description // "v": ; Value array // [ ; // "$14961836116kXQRA:matrix.org", ; Left accept - // "GFkS15QjKBKjxSZpz", ; Center child - // "HLacMRucdEPdJrzBz" ; Right child + // "$15018692261xPQDB:matrix.org", ; Right accept + // ] ; + // "c": ; Child array + // [ ; + // "nPKN9twTF9a8k5dD7AApFcaraHTX", ; Left child + // "PcxAAACvkvyUMz19AZcCfrC3S84s", ; Center child + // "2jVYKIMKErJ6w6BLMhfVjsXearhB", ; Right child // ] ; // } ; // @@ -829,32 +834,22 @@ const database::description events_description // Elements are ordered based on type+state_key lexical sort. The type // and the state_key strings are literally concatenated to this effect. // They're not hashed. We can have some more control over data locality - // this way. There is no prefix/trie keying yet, but that should probably - // happen. Any number of values may be in a key array, not just type+ + // this way. Any number of values may be in a key array, not just type+ // state_key. The concatenation involves the string with its surrounding // quotes as to not allow the user to mess about conflicting values. // ``` // "m.room.member""@jzk" > "m.room.create""" // ``` - // The values are either event MXID's or some identifier of a child node. - // The event MXID is leaf-data, no child node will be found there. The - // common tree traversal rules then apply: if the query value is less - // than the first element key, val[0] is followed; if compares between the - // first and second key, then val[1] is followed; if it compares greater - // than the last key, the last val is followed. - // // Unlike traditional trees of such variety, the number of elements is not // really well defined and not even fixed. There just has to be one more - // value in the "val" list than there are keys in the "key" list. To make + // value in the "child" list than there are keys in the "key" list. To make // this structure efficient we have to figure out a good number of // children per node, and that might even be a contextual decision. The // more children, the less depth to the query, but at the cost of a larger // node size. A larger node in this system isn't just relevant to // retrieval, but consider nodes are also immutable. Changes to the tree // create new nodes for each changed path so the old nodes can still - // represent the old state. Repacking nodes to represent slightly different - // states within the same node is a possible exercise left for the future. - // + // represent the old state. state_node, };