From 0e956dbaca79d66441658a641655953023d97c90 Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Tue, 20 Aug 2019 19:44:00 -0700 Subject: [PATCH] ircd::m::dbs: Start a directory README; add some comments. --- include/ircd/m/dbs/README.md | 57 +++++++++++++++++++++++++++++++ include/ircd/m/dbs/dbs.h | 2 +- include/ircd/m/dbs/event_column.h | 8 +++++ 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 include/ircd/m/dbs/README.md diff --git a/include/ircd/m/dbs/README.md b/include/ircd/m/dbs/README.md new file mode 100644 index 000000000..4ce06fd1f --- /dev/null +++ b/include/ircd/m/dbs/README.md @@ -0,0 +1,57 @@ +# Database Schema + +This system provides local storage for all events and related metadata using +the `events` database. The database is divided into several columns. + +Writing to the database must only occur through the single `write()` call, +which operates using transactions. In fact, `write()` itself only builds +transactions and does not actually modify the database until the user commits +the transaction. + +Reading from the database can occur more directly by referencing the columns +and using the `ircd::db` API's. + +``` +Note for casual developers: This is low-level API. It is highly likely what +you are looking for has a real interface somewhere in ircd::m. +``` + +### Column Overview + +There are two categories of columns: Direct event data, and indirect metadata. + +The only data stored by the server is in the form of matrix events in rooms. +No arbitrary application data is stored in the database. For example, there +is no "accounts column" storing some user account information; instead this +would be implemented as some matrix event with the type like `ircd.account` +and the `content` containing our arbitrary data. + +The only metadata stored in addition to the original event data optimizes and +enhances the original event data. Again, no arbitrary application data is +stored here. Everything stored here helps to facilitate the service of events +inside rooms, for any reasonable purpose, which we then build the application +layer on top of. + +#### Direct columns + +Direct data consists of original event JSON in addition to several direct +property columns. The fundamental event JSON is stored in `_event_json`. +Select properties from the original JSON are also stored in tuned columns +(see: event_column.h). + +The event_column(s) all store duplicate data from the original _event_json +but limited to a single specific property. The index key is an event_idx +(just like _event_json). These columns are useful for various optimizations +at the cost of the additional space consumed. + +When conducting a point lookup of an event property with m::get() or with +keys masked in m::event::fetch::opts, these columns handle the query iff +all desired properties can be satisfied from these columns; otherwise +if a property is sought which does not have an active corresponding column +here the _event_json is used transparently to satisfy the query. + +Cache and storage details here can be tuned specific to each property. This +makes reading faster and cache footprints more compact, holding much larger +datasets without eviction; in addition to not disrupting the widely shared +_event_json cache during a simple iteration of one property for all events +on the server, etc. diff --git a/include/ircd/m/dbs/dbs.h b/include/ircd/m/dbs/dbs.h index ee6652a13..7880f0c67 100644 --- a/include/ircd/m/dbs/dbs.h +++ b/include/ircd/m/dbs/dbs.h @@ -46,7 +46,7 @@ namespace ircd::m::dbs::appendix #include "event_column.h" // event_idx => (direct value) #include "event_idx.h" // event_id => event_idx #include "event_json.h" // event_idx => (full JSON) -#include "event_refs.h" // eventidx | ref_type, event_idx +#include "event_refs.h" // event_idx | ref_type, event_idx #include "event_horizon.h" // event_id | event_idx #include "event_type.h" // type | event_idx #include "event_sender.h" // sender | event_idx || hostpart | localpart, event_idx diff --git a/include/ircd/m/dbs/event_column.h b/include/ircd/m/dbs/event_column.h index d00ffef2e..1bfc8e114 100644 --- a/include/ircd/m/dbs/event_column.h +++ b/include/ircd/m/dbs/event_column.h @@ -11,6 +11,11 @@ #pragma once #define HAVE_IRCD_M_DBS_EVENT_COLUMN_H +// These columns all store duplicate data from the original _event_json +// but limited to a single specific property. The index key is an event_idx +// (just like _event_json). These columns are useful for various optimizations +// at the cost of the additional space consumed. + namespace ircd::m::dbs { // Event property column max-count. The number of event columns may be @@ -20,6 +25,9 @@ namespace ircd::m::dbs event::size() }; + // There is one position in this array corresponding to each property + // in the m::event tuple, however, the db::column in this position may + // be default-initialized if this column is not used. extern std::array event_column; }