mirror of
https://github.com/matrix-construct/construct
synced 2025-01-14 00:34:18 +01:00
fixup! ircd::db: Develop object-store out of db system.
This commit is contained in:
parent
b07169dd69
commit
a2625b9f4b
5 changed files with 141 additions and 140 deletions
|
@ -1,10 +1,10 @@
|
|||
IRCd Database
|
||||
## IRCd Database
|
||||
|
||||
IRCd's database is presented here primarily as a persistent Object store.
|
||||
In other words, the structure presented by the database can be represented
|
||||
with JSON. This is built from the primitives of `column`s, `row`s and `cell`s.
|
||||
with JSON. This is built from the primitives of `column`, `row` and `cell`.
|
||||
|
||||
Columns:
|
||||
#### Columns
|
||||
While a simple key-value store could naively store a JSON document as a textual
|
||||
value, we provide additional structure schematized before opening a database:
|
||||
Every member of a JSON object is a `column` in this database. To address members
|
||||
|
@ -12,14 +12,14 @@ within nested objects, we specify a `column` with a "foo.bar.baz" path syntax. T
|
|||
puts all columns at the same level in our code, even though they may represent
|
||||
deeply nested values.
|
||||
|
||||
Rows:
|
||||
#### Rows
|
||||
Since `columns` are technically independent key-value stores (they have their own
|
||||
index), when an index key is the same between columns we call this a `row`. For basic
|
||||
object storage the schema is such that we use the same keys between all columns. For
|
||||
example, an index would be a username in a user database. The user database itself
|
||||
takes the form of a single JSON object and any member lookup happens on a user's row.
|
||||
|
||||
Cells:
|
||||
#### Cells
|
||||
A `cell` is a single value in a `column` indexed by a key that should be able to form
|
||||
a `row` between columns. Consider the following near-json expression:
|
||||
|
||||
|
@ -29,6 +29,7 @@ In the users database, we find the `column` "password" and the `row` for "root"
|
|||
set that `cell` to "foobar"
|
||||
|
||||
Consider these expressions for objects at some depth:
|
||||
|
||||
users["root"] = {"password.plaintext", "foobar"};
|
||||
users["root"] = {"password", {"plaintext, "foobar"}};
|
||||
|
||||
|
@ -36,7 +37,7 @@ The column is always found as "password.plaintext". We find it (and can iterate
|
|||
if it were an object) by string-manipulating these full paths which all sit in a single map
|
||||
and are always open, even if the cell is empty for some row.
|
||||
|
||||
Important notes:
|
||||
### Important notes
|
||||
|
||||
!!!
|
||||
The database system is plugged into the userspace context system to facilitate IO. This means
|
||||
|
|
|
@ -23,51 +23,18 @@
|
|||
#pragma once
|
||||
#define HAVE_IRCD_DB_COLUMN_H
|
||||
|
||||
namespace ircd {
|
||||
namespace db {
|
||||
|
||||
// Columns add the ability to run multiple LevelDB's in synchrony under the same database
|
||||
// (directory). Each column is a fully distinct key/value store; they are merely joined
|
||||
// for consistency.
|
||||
//
|
||||
// [GET] may be posted to a separate thread which incurs the time of IO while the calling
|
||||
// ircd::context yields.
|
||||
//
|
||||
// [SET] usually occur without yielding your context because the DB is write-log oriented.
|
||||
//
|
||||
|
||||
namespace ircd {
|
||||
namespace db {
|
||||
|
||||
enum class set
|
||||
{
|
||||
FSYNC, // Uses kernel filesystem synchronization after write (slow)
|
||||
NO_JOURNAL, // Write Ahead Log (WAL) for some crash recovery
|
||||
MISSING_COLUMNS // No exception thrown when writing to a deleted column family
|
||||
};
|
||||
|
||||
struct sopts
|
||||
:optlist<set>
|
||||
{
|
||||
template<class... list> sopts(list&&... l): optlist<set>{std::forward<list>(l)...} {}
|
||||
};
|
||||
|
||||
enum class get
|
||||
{
|
||||
PIN, // Keep iter data in memory for iter lifetime (good for lots of ++/--)
|
||||
CACHE, // Update the cache (CACHE is default for non-iterator operations)
|
||||
NO_CACHE, // Do not update the cache (NO_CACHE is default for iterators)
|
||||
NO_SNAPSHOT, // This iterator will have the latest data (tailing)
|
||||
NO_CHECKSUM, // Integrity of data will be checked unless this is specified
|
||||
READAHEAD, // Pair with a size in bytes for prefetching additional data
|
||||
};
|
||||
|
||||
struct gopts
|
||||
:optlist<get>
|
||||
{
|
||||
database::snapshot snapshot;
|
||||
|
||||
template<class... list> gopts(list&&... l): optlist<get>{std::forward<list>(l)...} {}
|
||||
};
|
||||
|
||||
// Columns add the ability to run multiple LevelDB's in synchrony under the same database
|
||||
// (directory). Each column is a fully distinct key/value store; they are merely joined
|
||||
// for consistency.
|
||||
//
|
||||
struct column
|
||||
{
|
||||
struct const_iterator;
|
||||
|
@ -82,19 +49,17 @@ struct column
|
|||
protected:
|
||||
using ColumnFamilyHandle = rocksdb::ColumnFamilyHandle;
|
||||
|
||||
std::shared_ptr<database> d;
|
||||
database::column *c;
|
||||
|
||||
public:
|
||||
operator const database &() const { return *d; }
|
||||
operator const database &() const { return database::get(*c); }
|
||||
operator const database::column &() const { return *c; }
|
||||
operator std::shared_ptr<database>() const { return d; }
|
||||
|
||||
operator database &() { return *d; }
|
||||
operator database &() { return database::get(*c); }
|
||||
operator database::column &() { return *c; }
|
||||
|
||||
operator bool() const { return bool(d); }
|
||||
bool operator!() const { return !d; }
|
||||
operator bool() const { return bool(c); }
|
||||
bool operator!() const { return !c; }
|
||||
|
||||
// [GET] Iterations
|
||||
const_iterator cbegin(const gopts & = {});
|
||||
|
@ -105,9 +70,6 @@ struct column
|
|||
const_iterator lower_bound(const string_view &key, const gopts & = {});
|
||||
const_iterator upper_bound(const string_view &key, const gopts & = {});
|
||||
|
||||
// [GET] Tests if key exists
|
||||
bool has(const string_view &key, const gopts & = {});
|
||||
|
||||
// [GET] Perform a get into a closure. This offers a reference to the data with zero-copy.
|
||||
void operator()(const string_view &key, const view_closure &func, const gopts & = {});
|
||||
void operator()(const string_view &key, const gopts &, const view_closure &func);
|
||||
|
@ -117,13 +79,11 @@ struct column
|
|||
void operator()(const std::initializer_list<delta> &, const sopts & = {});
|
||||
void operator()(const op &, const string_view &key, const string_view &val = {}, const sopts & = {});
|
||||
|
||||
// Flush memory tables to disk (this column only).
|
||||
void flush(const bool &blocking = false);
|
||||
|
||||
column(std::shared_ptr<database>, database::column &);
|
||||
column(database &, database::column &);
|
||||
column(database &, const string_view &column);
|
||||
column(database::column &);
|
||||
column(database::column &c)
|
||||
:c{&c}
|
||||
{}
|
||||
|
||||
column() = default;
|
||||
};
|
||||
|
||||
|
@ -138,6 +98,9 @@ const std::string &name(const column &);
|
|||
size_t file_count(column &);
|
||||
size_t bytes(column &);
|
||||
|
||||
// [GET] Tests if key exists
|
||||
bool has(column &, const string_view &key, const gopts & = {});
|
||||
|
||||
// [GET] Convenience functions to copy data into your buffer.
|
||||
// The signed char buffer is null terminated; the unsigned is not.
|
||||
size_t read(column &, const string_view &key, uint8_t *const &buf, const size_t &max, const gopts & = {});
|
||||
|
@ -151,5 +114,25 @@ void write(column &, const string_view &key, const uint8_t *const &buf, const si
|
|||
// [SET] Remove data from the db. not_found is never thrown.
|
||||
void del(column &, const string_view &key, const sopts & = {});
|
||||
|
||||
// [SET] Flush memory tables to disk (this column only).
|
||||
void flush(column &, const bool &blocking = false);
|
||||
|
||||
} // namespace db
|
||||
} // namespace ircd
|
||||
|
||||
inline
|
||||
ircd::db::column::column(database &d,
|
||||
const string_view &column_name)
|
||||
try
|
||||
:column
|
||||
{
|
||||
*d.columns.at(column_name)
|
||||
}
|
||||
{
|
||||
}
|
||||
catch(const std::out_of_range &e)
|
||||
{
|
||||
log.error("'%s' failed to open non-existent column '%s'",
|
||||
d.name,
|
||||
column_name);
|
||||
}
|
||||
|
|
|
@ -75,6 +75,9 @@ struct database
|
|||
database(database &&) = delete;
|
||||
database(const database &) = delete;
|
||||
~database() noexcept;
|
||||
|
||||
static const database &get(const column &);
|
||||
static database &get(column &);
|
||||
};
|
||||
|
||||
// options <-> string
|
||||
|
|
|
@ -37,6 +37,37 @@ template<class T> using optlist = std::initializer_list<optval<T>>;
|
|||
template<class T> bool has_opt(const optlist<T> &, const T &);
|
||||
template<class T> ssize_t opt_val(const optlist<T> &, const T &);
|
||||
|
||||
enum class set
|
||||
{
|
||||
FSYNC, // Uses kernel filesystem synchronization after write (slow)
|
||||
NO_JOURNAL, // Write Ahead Log (WAL) for some crash recovery
|
||||
MISSING_COLUMNS // No exception thrown when writing to a deleted column family
|
||||
};
|
||||
|
||||
struct sopts
|
||||
:optlist<set>
|
||||
{
|
||||
template<class... list> sopts(list&&... l): optlist<set>{std::forward<list>(l)...} {}
|
||||
};
|
||||
|
||||
enum class get
|
||||
{
|
||||
PIN, // Keep iter data in memory for iter lifetime (good for lots of ++/--)
|
||||
CACHE, // Update the cache (CACHE is default for non-iterator operations)
|
||||
NO_CACHE, // Do not update the cache (NO_CACHE is default for iterators)
|
||||
NO_SNAPSHOT, // This iterator will have the latest data (tailing)
|
||||
NO_CHECKSUM, // Integrity of data will be checked unless this is specified
|
||||
READAHEAD, // Pair with a size in bytes for prefetching additional data
|
||||
};
|
||||
|
||||
struct gopts
|
||||
:optlist<get>
|
||||
{
|
||||
database::snapshot snapshot;
|
||||
|
||||
template<class... list> gopts(list&&... l): optlist<get>{std::forward<list>(l)...} {}
|
||||
};
|
||||
|
||||
} // namespace db
|
||||
} // namespace ircd
|
||||
|
||||
|
|
143
ircd/db.cc
143
ircd/db.cc
|
@ -181,7 +181,8 @@ struct database::comparator
|
|||
};
|
||||
|
||||
struct database::column
|
||||
:rocksdb::ColumnFamilyDescriptor
|
||||
:std::enable_shared_from_this<database::column>
|
||||
,rocksdb::ColumnFamilyDescriptor
|
||||
{
|
||||
database *d;
|
||||
std::type_index key_type;
|
||||
|
@ -405,6 +406,20 @@ const
|
|||
return *columns.at(name);
|
||||
}
|
||||
|
||||
ircd::db::database &
|
||||
ircd::db::database::get(column &column)
|
||||
{
|
||||
assert(column.d);
|
||||
return *column.d;
|
||||
}
|
||||
|
||||
const ircd::db::database &
|
||||
ircd::db::database::get(const column &column)
|
||||
{
|
||||
assert(column.d);
|
||||
return *column.d;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// database::comparator
|
||||
|
@ -1069,45 +1084,12 @@ ircd::db::name(const column &column)
|
|||
// column
|
||||
//
|
||||
|
||||
ircd::db::column::column(database &d,
|
||||
const string_view &column_name)
|
||||
try
|
||||
:column
|
||||
{
|
||||
d, *d.columns.at(column_name)
|
||||
}
|
||||
{
|
||||
}
|
||||
catch(const std::out_of_range &e)
|
||||
{
|
||||
log.error("'%s' failed to open non-existent column '%s'",
|
||||
d.name,
|
||||
column_name);
|
||||
}
|
||||
|
||||
ircd::db::column::column(database::column &c)
|
||||
:column{*c.d,c}
|
||||
{
|
||||
}
|
||||
|
||||
ircd::db::column::column(database &d,
|
||||
database::column &c)
|
||||
:column{shared_from(d), c}
|
||||
{
|
||||
}
|
||||
|
||||
ircd::db::column::column(std::shared_ptr<database> d,
|
||||
database::column &c)
|
||||
:d{std::move(d)}
|
||||
,c{&c}
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
ircd::db::column::flush(const bool &blocking)
|
||||
ircd::db::flush(column &column,
|
||||
const bool &blocking)
|
||||
{
|
||||
database &d(*this);
|
||||
database::column &c(*this);
|
||||
database &d(column);
|
||||
database::column &c(column);
|
||||
|
||||
rocksdb::FlushOptions opts;
|
||||
opts.wait = blocking;
|
||||
|
@ -1175,6 +1157,49 @@ ircd::db::append(rocksdb::WriteBatch &batch,
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
ircd::db::has(column &column,
|
||||
const string_view &key,
|
||||
const gopts &gopts)
|
||||
{
|
||||
database &d(column);
|
||||
database::column &c(column);
|
||||
|
||||
const auto k(slice(key));
|
||||
auto opts(make_opts(gopts));
|
||||
|
||||
// Perform queries which are stymied from any sysentry
|
||||
opts.read_tier = NON_BLOCKING;
|
||||
|
||||
// Perform a co-RP query to the filtration
|
||||
if(!d.d->KeyMayExist(opts, c, k, nullptr, nullptr))
|
||||
return false;
|
||||
|
||||
// Perform a query to the cache
|
||||
auto status(d.d->Get(opts, c, k, nullptr));
|
||||
if(status.IsIncomplete())
|
||||
{
|
||||
// DB cache miss; next query requires I/O, offload it
|
||||
opts.read_tier = BLOCKING;
|
||||
ctx::offload([&d, &c, &k, &opts, &status]
|
||||
{
|
||||
status = d.d->Get(opts, c, k, nullptr);
|
||||
});
|
||||
}
|
||||
|
||||
// Finally the result
|
||||
switch(status.code())
|
||||
{
|
||||
using rocksdb::Status;
|
||||
|
||||
case Status::kOk: return true;
|
||||
case Status::kNotFound: return false;
|
||||
default:
|
||||
throw_on_error(status);
|
||||
__builtin_unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ircd::db::column::operator()(const op &op,
|
||||
const string_view &key,
|
||||
|
@ -1229,48 +1254,6 @@ ircd::db::column::operator()(const string_view &key,
|
|||
func(string_view{v.data(), v.size()});
|
||||
}
|
||||
|
||||
bool
|
||||
ircd::db::column::has(const string_view &key,
|
||||
const gopts &gopts)
|
||||
{
|
||||
database &d(*this);
|
||||
database::column &c(*this);
|
||||
|
||||
const auto k(slice(key));
|
||||
auto opts(make_opts(gopts));
|
||||
|
||||
// Perform queries which are stymied from any sysentry
|
||||
opts.read_tier = NON_BLOCKING;
|
||||
|
||||
// Perform a co-RP query to the filtration
|
||||
if(!d.d->KeyMayExist(opts, c, k, nullptr, nullptr))
|
||||
return false;
|
||||
|
||||
// Perform a query to the cache
|
||||
auto status(d.d->Get(opts, c, k, nullptr));
|
||||
if(status.IsIncomplete())
|
||||
{
|
||||
// DB cache miss; next query requires I/O, offload it
|
||||
opts.read_tier = BLOCKING;
|
||||
ctx::offload([&d, &c, &k, &opts, &status]
|
||||
{
|
||||
status = d.d->Get(opts, c, k, nullptr);
|
||||
});
|
||||
}
|
||||
|
||||
// Finally the result
|
||||
switch(status.code())
|
||||
{
|
||||
using rocksdb::Status;
|
||||
|
||||
case Status::kOk: return true;
|
||||
case Status::kNotFound: return false;
|
||||
default:
|
||||
throw_on_error(status);
|
||||
__builtin_unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// column::const_iterator
|
||||
|
|
Loading…
Reference in a new issue