From 8296adb6c319e8f45c5c07c6e5a0740a72f50d46 Mon Sep 17 00:00:00 2001
From: Jason Volk <jason@zemos.net>
Date: Wed, 31 Jan 2018 18:49:40 -0800
Subject: [PATCH] ircd::m::state: Try using a separate child array to store
 values at any level (partial).

---
 include/ircd/m/state.h | 19 +++++++++++------
 ircd/m/state.cc        | 48 +++++++++++++++++++++++++++++++++---------
 modules/db/events.cc   | 25 +++++++++-------------
 3 files changed, 60 insertions(+), 32 deletions(-)
diff --git a/include/ircd/m/state.h b/include/ircd/m/state.h
index 8c4711e00..5e82793cf 100644
--- a/include/ircd/m/state.h
+++ b/include/ircd/m/state.h
@@ -19,17 +19,18 @@ namespace ircd::m::state
 	using node_closure = std::function<void (const json::object &)>;
 	using key_closure = std::function<void (const json::array &)>;
 
-	constexpr size_t ID_MAX_SZ    { 64               };
-	constexpr size_t KEY_MAX_SZ   { 256 + 256 + 16   };
-	constexpr size_t NODE_MAX_SZ  { 4_KiB            };
+	constexpr size_t ID_MAX_SZ { 64 };
+	constexpr size_t KEY_MAX_SZ { 256 + 256 + 16 };
+	constexpr size_t VAL_MAX_SZ { 256 + 16 };
+	constexpr size_t NODE_MAX_SZ { 4_KiB };
+	constexpr int8_t MAX_HEIGHT { 16 }; // good for few mil at any degree :)
 
 	int keycmp(const json::array &a, const json::array &b);
 
 	json::array make_key(const mutable_buffer &out, const string_view &type, const string_view &state_key);
 	void make_key(const string_view &type, const string_view &state_key, const key_closure &);
 
-	json::object make_node(const mutable_buffer &out, const json::array *const &keys, const size_t &kn, const string_view *const &vals, const size_t &vn);
-	json::object make_node(const mutable_buffer &out, const node &old, const size_t &pos, const json::array &key, const string_view &val);
+	json::object make_node(const mutable_buffer &out, const json::array *const &keys, const size_t &kn, const string_view *const &vals, const size_t &vn, const string_view *const &child, const size_t &cn);
 	template<class... args> string_view set_node(db::txn &txn, const mutable_buffer &id, args&&...);
 
 	void get_node(db::column &, const string_view &id, const node_closure &);
@@ -54,6 +55,7 @@ namespace ircd::m::state::name
 {
 	constexpr const char *const k {"k"};
 	constexpr const char *const v {"v"};
+	constexpr const char *const c {"c"};
 }
 
 #pragma GCC diagnostic push
@@ -62,16 +64,19 @@ struct ircd::m::state::node
 :json::tuple
 <
 	json::property<name::k, json::array>,
-	json::property<name::v, json::array>
+	json::property<name::v, json::array>,
+	json::property<name::c, json::array>
 >
 {
 	size_t keys() const;
 	size_t vals() const;
-	size_t children() const;
+	size_t childs() const;
 
 	json::array key(const size_t &) const;
 	string_view val(const size_t &) const;
+	string_view child(const size_t &) const;
 
+	bool has_child(const size_t &) const;
 	size_t find(const json::array &key) const;
 
 	using super_type::tuple;
diff --git a/ircd/m/state.cc b/ircd/m/state.cc
index 4263962c9..84d71a07b 100644
--- a/ircd/m/state.cc
+++ b/ircd/m/state.cc
@@ -310,16 +310,19 @@ ircd::m::state::make_node(const mutable_buffer &out,
 /// Prints a node into the buffer `out` using the keys and vals arguments
 /// which must be pointers to arrays. Size of each array is specified in
 /// the following argument. Each array must have at least one element each.
-/// the vals array can have one more element than the keys array if desired.
+/// the chld array can have one more element than the keys array if desired.
 ircd::json::object
 ircd::m::state::make_node(const mutable_buffer &out,
                           const json::array *const &keys_,
                           const size_t &kn,
                           const string_view *const &vals_,
-                          const size_t &vn)
+                          const size_t &vn,
+                          const string_view *const &chld_,
+                          const size_t &cn)
 {
 	assert(kn > 0 && vn > 0);
-	assert(kn == vn || kn + 1 == vn);
+	assert(kn == vn);
+	assert(cn <= kn + 1);
 
 	json::value keys[kn];
 	{
@@ -333,11 +336,18 @@ ircd::m::state::make_node(const mutable_buffer &out,
 			vals[i] = vals_[i];
 	};
 
+	json::value chld[cn];
+	{
+		for(size_t i(0); i < cn; ++i)
+			chld[i] = chld_[i];
+	};
+
 	json::iov iov;
 	const json::iov::push push[]
 	{
 		{ iov, { "k"_sv, { keys, kn } } },
 		{ iov, { "v"_sv, { vals, vn } } },
+		{ iov, { "c"_sv, { chld, cn } } },
 	};
 
 	return { data(out), json::print(out, iov) };
@@ -427,12 +437,29 @@ const
 	return ret;
 }
 
+// Count values that actually lead to other nodes
+bool
+ircd::m::state::node::has_child(const size_t &pos)
+const
+{
+	return !empty(child(pos));
+}
+
+ircd::string_view
+ircd::m::state::node::child(const size_t &pos)
+const
+{
+	const json::array children{json::get<"c"_>(*this, json::empty_array)};
+	return unquote(children[pos]);
+}
+
 // Get value at position pos (throws out_of_range)
 ircd::string_view
 ircd::m::state::node::val(const size_t &pos)
 const
 {
-	return unquote(json::at<"v"_>(*this).at(pos));
+	const json::array values{json::get<"v"_>(*this, json::empty_array)};
+	return unquote(values[pos]);
 }
 
 // Get key at position pos (throws out_of_range)
@@ -440,18 +467,19 @@ ircd::json::array
 ircd::m::state::node::key(const size_t &pos)
 const
 {
-	return json::at<"k"_>(*this).at(pos);
+	const json::array keys{json::get<"k"_>(*this, json::empty_array)};
+	const json::array ret{keys[pos]};
+	return ret;
 }
 
-// Count values that actually lead to other nodes
+// Count children in node
 size_t
-ircd::m::state::node::children()
+ircd::m::state::node::childs()
 const
 {
 	size_t ret(0);
-	for(const auto &v : json::get<"v"_>(*this))
-		if(!valid(id::EVENT, v))
-			++ret;
+	for(const auto &c : json::get<"c"_>(*this))
+		ret += !empty(c);
 
 	return ret;
 }
diff --git a/modules/db/events.cc b/modules/db/events.cc
index 48435fb83..f782077b1 100644
--- a/modules/db/events.cc
+++ b/modules/db/events.cc
@@ -819,8 +819,13 @@ const database::description events_description
 	//     "v":                                         ; Value array
 	//     [                                            ;
 	//         "$14961836116kXQRA:matrix.org",          ; Left accept
-	//         "GFkS15QjKBKjxSZpz",                     ; Center child
-	//         "HLacMRucdEPdJrzBz"                      ; Right child
+	//         "$15018692261xPQDB:matrix.org",          ; Right accept
+	//     ]                                            ;
+	//     "c":                                         ; Child array
+	//     [                                            ;
+	//         "nPKN9twTF9a8k5dD7AApFcaraHTX",          ; Left child
+	//         "PcxAAACvkvyUMz19AZcCfrC3S84s",          ; Center child
+	//         "2jVYKIMKErJ6w6BLMhfVjsXearhB",          ; Right child
 	//     ]                                            ;
 	// }                                                ;
 	//
@@ -829,32 +834,22 @@ const database::description events_description
 	// Elements are ordered based on type+state_key lexical sort. The type
 	// and the state_key strings are literally concatenated to this effect.
 	// They're not hashed. We can have some more control over data locality
-	// this way. There is no prefix/trie keying yet, but that should probably
-	// happen. Any number of values may be in a key array, not just type+
+	// this way. Any number of values may be in a key array, not just type+
 	// state_key. The concatenation involves the string with its surrounding
 	// quotes as to not allow the user to mess about conflicting values.
 	// ```
 	// "m.room.member""@jzk" > "m.room.create"""
 	// ```
-	// The values are either event MXID's or some identifier of a child node.
-	// The event MXID is leaf-data, no child node will be found there. The
-	// common tree traversal rules then apply: if the query value is less
-	// than the first element key, val[0] is followed; if compares between the
-	// first and second key, then val[1] is followed; if it compares greater
-	// than the last key, the last val is followed.
-	//
 	// Unlike traditional trees of such variety, the number of elements is not
 	// really well defined and not even fixed. There just has to be one more
-	// value in the "val" list than there are keys in the "key" list. To make
+	// value in the "child" list than there are keys in the "key" list. To make
 	// this structure efficient we have to figure out a good number of
 	// children per node, and that might even be a contextual decision. The
 	// more children, the less depth to the query, but at the cost of a larger
 	// node size. A larger node in this system isn't just relevant to
 	// retrieval, but consider nodes are also immutable. Changes to the tree
 	// create new nodes for each changed path so the old nodes can still
-	// represent the old state. Repacking nodes to represent slightly different
-	// states within the same node is a possible exercise left for the future.
-	//
+	// represent the old state.
 	state_node,
 };