diff --git a/include/ircd/m/state.h b/include/ircd/m/state.h index ee59cfc85..42d483ca5 100644 --- a/include/ircd/m/state.h +++ b/include/ircd/m/state.h @@ -60,6 +60,50 @@ namespace ircd::m::state::name #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsubobject-linkage" + +/// Format for node: Node is plaintext and not binary at this time. In fact, +/// *evil chuckle*, node might as well be JSON and can easily become content +/// of another event sent to other rooms over network *snorts*. (important: +/// database is well compressed). +/// +/// { ; +/// "k": ; Key array +/// [ ; +/// ["m.room.member", "@ar4an:matrix.org"], ; Left key +/// ["m.room.member", "@jzk:matrix.org"] ; Right key +/// ], ; +/// "v": ; Value array +/// [ ; +/// "$14961836116kXQRA:matrix.org", ; Left accept +/// "$15018692261xPQDB:matrix.org", ; Right accept +/// ] ; +/// "c": ; Child array +/// [ ; +/// "nPKN9twTF9a8k5dD7AApFcaraHTX", ; Left child +/// "PcxAAACvkvyUMz19AZcCfrC3S84s", ; Center child +/// "2jVYKIMKErJ6w6BLMhfVjsXearhB", ; Right child +/// ] ; +/// } ; +/// +/// Elements are ordered based on type+state_key lexical sort. The type +/// and the state_key strings are literally concatenated to this effect. +/// They're not hashed. We can have some more control over data locality +/// this way. Any number of values may be in a key array, not just type+ +/// state_key. The concatenation involves the string with its surrounding +/// quotes as to not allow the user to mess about conflicting values. +/// ``` +/// "m.room.member""@jzk" > "m.room.create""" +/// ``` +/// Unlike traditional trees of such variety, the number of elements is not +/// really well defined and not even fixed. There just has to be one more +/// value in the "child" list than there are keys in the "key" list. To make +/// this structure efficient we have to figure out a good number of +/// children per node, and that might even be a contextual decision. The +/// more children, the less depth to the query, but at the cost of a larger +/// node size. A larger node in this system isn't just relevant to +/// retrieval, but consider nodes are also immutable. Changes to the tree +/// create new nodes for each changed path so the old nodes can still +/// represent the old state. struct ircd::m::state::node :json::tuple < @@ -91,6 +135,28 @@ struct ircd::m::state::node }; #pragma GCC diagnostic pop +struct ircd::m::state::node::rep +{ + std::array keys; + std::array vals; + std::array chld; + size_t kn {0}; + size_t vn {0}; + size_t cn {0}; + + bool full() const; + bool overfull() const; + size_t find(const json::array &key) const; + + void shr(const size_t &pos); + + json::object write(const mutable_buffer &out); + string_view write(db::txn &, const mutable_buffer &id); + + rep(const node &node); + rep() = default; +}; + static_assert ( ircd::m::state::NODE_MAX_KEY == ircd::m::state::NODE_MAX_VAL diff --git a/ircd/m/state.cc b/ircd/m/state.cc index fd2ce1bdd..f0d078858 100644 --- a/ircd/m/state.cc +++ b/ircd/m/state.cc @@ -10,122 +10,6 @@ #include -struct ircd::m::state::node::rep -{ - std::array keys; - std::array vals; - std::array chld; - size_t kn {0}; - size_t vn {0}; - size_t cn {0}; - - bool full() const; - bool overfull() const; - size_t find(const json::array &key) const; - - void shr(const size_t &pos); - - json::object write(const mutable_buffer &out); - string_view write(db::txn &, const mutable_buffer &id); - - rep(const node &node); - rep() = default; -}; - -ircd::m::state::node::rep::rep(const node &node) -:kn{node.keys(keys.data(), keys.size())} -,vn{node.vals(vals.data(), vals.size())} -,cn{node.childs(chld.data(), chld.size())} -{ -} - -ircd::string_view -ircd::m::state::node::rep::write(db::txn &txn, - const mutable_buffer &idbuf) -{ - thread_local char buf[NODE_MAX_SZ]; - return set_node(txn, idbuf, write(buf)); -} - -ircd::json::object -ircd::m::state::node::rep::write(const mutable_buffer &out) -{ - assert(kn == vn); - assert(cn <= kn + 1); - assert(kn > 0 && vn > 0); - - assert(kn <= NODE_MAX_KEY); - assert(vn <= NODE_MAX_VAL); - assert(cn <= NODE_MAX_DEG); - - json::value keys[kn]; - { - for(size_t i(0); i < kn; ++i) - keys[i] = this->keys[i]; - } - - json::value vals[vn]; - { - for(size_t i(0); i < vn; ++i) - vals[i] = this->vals[i]; - }; - - json::value chld[cn]; - { - for(size_t i(0); i < cn; ++i) - chld[i] = this->chld[i]; - }; - - json::iov iov; - const json::iov::push push[] - { - { iov, { "k"_sv, { keys, kn } } }, - { iov, { "v"_sv, { vals, vn } } }, - { iov, { "c"_sv, { chld, cn } } }, - }; - - return { data(out), json::print(out, iov) }; -} - -/// Shift right. -void -ircd::m::state::node::rep::shr(const size_t &pos) -{ - std::copy_backward(begin(keys) + pos, begin(keys) + kn, begin(keys) + kn + 1); - std::copy_backward(begin(vals) + pos, begin(vals) + vn, begin(vals) + vn + 1); - std::copy_backward(begin(chld) + pos, begin(chld) + cn, begin(chld) + cn + 1); -} - -size_t -ircd::m::state::node::rep::find(const json::array &parts) -const -{ - size_t i{0}; - for(; i < kn; ++i) - if(keycmp(parts, keys[i]) <= 0) - return i; - else - ++i; - - return i; -} - -bool -ircd::m::state::node::rep::overfull() -const -{ - assert(kn == vn); - return kn > NODE_MAX_KEY; -} - -bool -ircd::m::state::node::rep::full() -const -{ - assert(kn == vn); - return kn >= NODE_MAX_KEY; -} - void ircd::m::state::append_nodes(db::txn &txn, const event &event) @@ -199,8 +83,6 @@ inserter(int8_t &height, node::rep rep{node}; const auto pos{node.find(key)}; - std::cout << int(height) << " " << pos << " " << node << " <---- " << key << std::endl; - if(keycmp(node.key(pos), key) == 0) { rep.keys[pos] = key; @@ -283,9 +165,6 @@ inserter(int8_t &height, return rep.write(txn, idbuf); } - std::cout << "INTEGRATE PUSH T: " << child_id << std::endl; - std::cout << "INTEGRATE PUSH U: " << node << std::endl; - rep.shr(pos); rep.keys[pos] = key; ++rep.kn; @@ -575,6 +454,104 @@ ircd::m::state::keycmp(const json::array &a, 1; } +// +// rep +// + +ircd::m::state::node::rep::rep(const node &node) +:kn{node.keys(keys.data(), keys.size())} +,vn{node.vals(vals.data(), vals.size())} +,cn{node.childs(chld.data(), chld.size())} +{ +} + +ircd::string_view +ircd::m::state::node::rep::write(db::txn &txn, + const mutable_buffer &idbuf) +{ + thread_local char buf[NODE_MAX_SZ]; + return set_node(txn, idbuf, write(buf)); +} + +ircd::json::object +ircd::m::state::node::rep::write(const mutable_buffer &out) +{ + assert(kn == vn); + assert(cn <= kn + 1); + assert(kn > 0 && vn > 0); + + assert(kn <= NODE_MAX_KEY); + assert(vn <= NODE_MAX_VAL); + assert(cn <= NODE_MAX_DEG); + + json::value keys[kn]; + { + for(size_t i(0); i < kn; ++i) + keys[i] = this->keys[i]; + } + + json::value vals[vn]; + { + for(size_t i(0); i < vn; ++i) + vals[i] = this->vals[i]; + }; + + json::value chld[cn]; + { + for(size_t i(0); i < cn; ++i) + chld[i] = this->chld[i]; + }; + + json::iov iov; + const json::iov::push push[] + { + { iov, { "k"_sv, { keys, kn } } }, + { iov, { "v"_sv, { vals, vn } } }, + { iov, { "c"_sv, { chld, cn } } }, + }; + + return { data(out), json::print(out, iov) }; +} + +/// Shift right. +void +ircd::m::state::node::rep::shr(const size_t &pos) +{ + std::copy_backward(begin(keys) + pos, begin(keys) + kn, begin(keys) + kn + 1); + std::copy_backward(begin(vals) + pos, begin(vals) + vn, begin(vals) + vn + 1); + std::copy_backward(begin(chld) + pos, begin(chld) + cn, begin(chld) + cn + 1); +} + +size_t +ircd::m::state::node::rep::find(const json::array &parts) +const +{ + size_t i{0}; + for(; i < kn; ++i) + if(keycmp(parts, keys[i]) <= 0) + return i; + else + ++i; + + return i; +} + +bool +ircd::m::state::node::rep::overfull() +const +{ + assert(kn == vn); + return kn > NODE_MAX_KEY; +} + +bool +ircd::m::state::node::rep::full() +const +{ + assert(kn == vn); + return kn >= NODE_MAX_KEY; +} + // // node // @@ -605,8 +582,9 @@ const /// and argument compares less than both, 0 is returned; equal to key[0], /// 0 is returned; greater than key[0] and less than or equal to key[1], /// 1 is returned; greater than both: 2 is returned. Note that there can -/// be one more vals() than keys() in a node (this is usually a "full node") -/// but there might not be, and the returned pos might be out of range. +/// be one more childs() than keys() in a node (this is usually a "full +/// node") but there might not be, and the returned pos might be out of +/// range. size_t ircd::m::state::node::find(const json::array &parts) const diff --git a/modules/db/events.cc b/modules/db/events.cc index f782077b1..392dd5c1d 100644 --- a/modules/db/events.cc +++ b/modules/db/events.cc @@ -805,51 +805,6 @@ const database::description events_description // (state tree node id) => (state tree node) // - // Format for node: Node is plaintext and not binary at this time. In fact, - // *evil chuckle*, node might as well be JSON and can easily become content - // of another event sent to other rooms over network *snorts*. (important: - // database is well compressed). - // - // { ; - // "k": ; Key array - // [ ; - // ["m.room.member", "@ar4an"], ; Left key - // ["m.room.member", "@jzk"] ; Right key - // ], ; - // "v": ; Value array - // [ ; - // "$14961836116kXQRA:matrix.org", ; Left accept - // "$15018692261xPQDB:matrix.org", ; Right accept - // ] ; - // "c": ; Child array - // [ ; - // "nPKN9twTF9a8k5dD7AApFcaraHTX", ; Left child - // "PcxAAACvkvyUMz19AZcCfrC3S84s", ; Center child - // "2jVYKIMKErJ6w6BLMhfVjsXearhB", ; Right child - // ] ; - // } ; - // - // (note: actual JSON used is canonical and spaceless) - // - // Elements are ordered based on type+state_key lexical sort. The type - // and the state_key strings are literally concatenated to this effect. - // They're not hashed. We can have some more control over data locality - // this way. Any number of values may be in a key array, not just type+ - // state_key. The concatenation involves the string with its surrounding - // quotes as to not allow the user to mess about conflicting values. - // ``` - // "m.room.member""@jzk" > "m.room.create""" - // ``` - // Unlike traditional trees of such variety, the number of elements is not - // really well defined and not even fixed. There just has to be one more - // value in the "child" list than there are keys in the "key" list. To make - // this structure efficient we have to figure out a good number of - // children per node, and that might even be a contextual decision. The - // more children, the less depth to the query, but at the cost of a larger - // node size. A larger node in this system isn't just relevant to - // retrieval, but consider nodes are also immutable. Changes to the tree - // create new nodes for each changed path so the old nodes can still - // represent the old state. state_node, };