0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2025-01-14 00:34:18 +01:00

fixup! ircd::db: Develop object-store out of db system.

This commit is contained in:
Jason Volk 2017-03-30 16:20:01 -07:00
parent b07169dd69
commit a2625b9f4b
5 changed files with 141 additions and 140 deletions

View file

@ -1,10 +1,10 @@
IRCd Database
## IRCd Database
IRCd's database is presented here primarily as a persistent Object store.
In other words, the structure presented by the database can be represented
with JSON. This is built from the primitives of `column`s, `row`s and `cell`s.
with JSON. This is built from the primitives of `column`, `row` and `cell`.
Columns:
#### Columns
While a simple key-value store could naively store a JSON document as a textual
value, we provide additional structure schematized before opening a database:
Every member of a JSON object is a `column` in this database. To address members
@ -12,14 +12,14 @@ within nested objects, we specify a `column` with a "foo.bar.baz" path syntax. T
puts all columns at the same level in our code, even though they may represent
deeply nested values.
Rows:
#### Rows
Since `columns` are technically independent key-value stores (they have their own
index), when an index key is the same between columns we call this a `row`. For basic
object storage the schema is such that we use the same keys between all columns. For
example, an index would be a username in a user database. The user database itself
takes the form of a single JSON object and any member lookup happens on a user's row.
Cells:
#### Cells
A `cell` is a single value in a `column` indexed by a key that should be able to form
a `row` between columns. Consider the following near-json expression:
@ -29,6 +29,7 @@ In the users database, we find the `column` "password" and the `row` for "root"
set that `cell` to "foobar"
Consider these expressions for objects at some depth:
users["root"] = {"password.plaintext", "foobar"};
users["root"] = {"password", {"plaintext, "foobar"}};
@ -36,7 +37,7 @@ The column is always found as "password.plaintext". We find it (and can iterate
if it were an object) by string-manipulating these full paths which all sit in a single map
and are always open, even if the cell is empty for some row.
Important notes:
### Important notes
!!!
The database system is plugged into the userspace context system to facilitate IO. This means

View file

@ -23,51 +23,18 @@
#pragma once
#define HAVE_IRCD_DB_COLUMN_H
namespace ircd {
namespace db {
// Columns add the ability to run multiple LevelDB's in synchrony under the same database
// (directory). Each column is a fully distinct key/value store; they are merely joined
// for consistency.
//
// [GET] may be posted to a separate thread which incurs the time of IO while the calling
// ircd::context yields.
//
// [SET] usually occur without yielding your context because the DB is write-log oriented.
//
namespace ircd {
namespace db {
enum class set
{
FSYNC, // Uses kernel filesystem synchronization after write (slow)
NO_JOURNAL, // Write Ahead Log (WAL) for some crash recovery
MISSING_COLUMNS // No exception thrown when writing to a deleted column family
};
struct sopts
:optlist<set>
{
template<class... list> sopts(list&&... l): optlist<set>{std::forward<list>(l)...} {}
};
enum class get
{
PIN, // Keep iter data in memory for iter lifetime (good for lots of ++/--)
CACHE, // Update the cache (CACHE is default for non-iterator operations)
NO_CACHE, // Do not update the cache (NO_CACHE is default for iterators)
NO_SNAPSHOT, // This iterator will have the latest data (tailing)
NO_CHECKSUM, // Integrity of data will be checked unless this is specified
READAHEAD, // Pair with a size in bytes for prefetching additional data
};
struct gopts
:optlist<get>
{
database::snapshot snapshot;
template<class... list> gopts(list&&... l): optlist<get>{std::forward<list>(l)...} {}
};
// Columns add the ability to run multiple LevelDB's in synchrony under the same database
// (directory). Each column is a fully distinct key/value store; they are merely joined
// for consistency.
//
struct column
{
struct const_iterator;
@ -82,19 +49,17 @@ struct column
protected:
using ColumnFamilyHandle = rocksdb::ColumnFamilyHandle;
std::shared_ptr<database> d;
database::column *c;
public:
operator const database &() const { return *d; }
operator const database &() const { return database::get(*c); }
operator const database::column &() const { return *c; }
operator std::shared_ptr<database>() const { return d; }
operator database &() { return *d; }
operator database &() { return database::get(*c); }
operator database::column &() { return *c; }
operator bool() const { return bool(d); }
bool operator!() const { return !d; }
operator bool() const { return bool(c); }
bool operator!() const { return !c; }
// [GET] Iterations
const_iterator cbegin(const gopts & = {});
@ -105,9 +70,6 @@ struct column
const_iterator lower_bound(const string_view &key, const gopts & = {});
const_iterator upper_bound(const string_view &key, const gopts & = {});
// [GET] Tests if key exists
bool has(const string_view &key, const gopts & = {});
// [GET] Perform a get into a closure. This offers a reference to the data with zero-copy.
void operator()(const string_view &key, const view_closure &func, const gopts & = {});
void operator()(const string_view &key, const gopts &, const view_closure &func);
@ -117,13 +79,11 @@ struct column
void operator()(const std::initializer_list<delta> &, const sopts & = {});
void operator()(const op &, const string_view &key, const string_view &val = {}, const sopts & = {});
// Flush memory tables to disk (this column only).
void flush(const bool &blocking = false);
column(std::shared_ptr<database>, database::column &);
column(database &, database::column &);
column(database &, const string_view &column);
column(database::column &);
column(database::column &c)
:c{&c}
{}
column() = default;
};
@ -138,6 +98,9 @@ const std::string &name(const column &);
size_t file_count(column &);
size_t bytes(column &);
// [GET] Tests if key exists
bool has(column &, const string_view &key, const gopts & = {});
// [GET] Convenience functions to copy data into your buffer.
// The signed char buffer is null terminated; the unsigned is not.
size_t read(column &, const string_view &key, uint8_t *const &buf, const size_t &max, const gopts & = {});
@ -151,5 +114,25 @@ void write(column &, const string_view &key, const uint8_t *const &buf, const si
// [SET] Remove data from the db. not_found is never thrown.
void del(column &, const string_view &key, const sopts & = {});
// [SET] Flush memory tables to disk (this column only).
void flush(column &, const bool &blocking = false);
} // namespace db
} // namespace ircd
inline
ircd::db::column::column(database &d,
const string_view &column_name)
try
:column
{
*d.columns.at(column_name)
}
{
}
catch(const std::out_of_range &e)
{
log.error("'%s' failed to open non-existent column '%s'",
d.name,
column_name);
}

View file

@ -75,6 +75,9 @@ struct database
database(database &&) = delete;
database(const database &) = delete;
~database() noexcept;
static const database &get(const column &);
static database &get(column &);
};
// options <-> string

View file

@ -37,6 +37,37 @@ template<class T> using optlist = std::initializer_list<optval<T>>;
template<class T> bool has_opt(const optlist<T> &, const T &);
template<class T> ssize_t opt_val(const optlist<T> &, const T &);
enum class set
{
FSYNC, // Uses kernel filesystem synchronization after write (slow)
NO_JOURNAL, // Write Ahead Log (WAL) for some crash recovery
MISSING_COLUMNS // No exception thrown when writing to a deleted column family
};
struct sopts
:optlist<set>
{
template<class... list> sopts(list&&... l): optlist<set>{std::forward<list>(l)...} {}
};
enum class get
{
PIN, // Keep iter data in memory for iter lifetime (good for lots of ++/--)
CACHE, // Update the cache (CACHE is default for non-iterator operations)
NO_CACHE, // Do not update the cache (NO_CACHE is default for iterators)
NO_SNAPSHOT, // This iterator will have the latest data (tailing)
NO_CHECKSUM, // Integrity of data will be checked unless this is specified
READAHEAD, // Pair with a size in bytes for prefetching additional data
};
struct gopts
:optlist<get>
{
database::snapshot snapshot;
template<class... list> gopts(list&&... l): optlist<get>{std::forward<list>(l)...} {}
};
} // namespace db
} // namespace ircd

View file

@ -181,7 +181,8 @@ struct database::comparator
};
struct database::column
:rocksdb::ColumnFamilyDescriptor
:std::enable_shared_from_this<database::column>
,rocksdb::ColumnFamilyDescriptor
{
database *d;
std::type_index key_type;
@ -405,6 +406,20 @@ const
return *columns.at(name);
}
ircd::db::database &
ircd::db::database::get(column &column)
{
assert(column.d);
return *column.d;
}
const ircd::db::database &
ircd::db::database::get(const column &column)
{
assert(column.d);
return *column.d;
}
///////////////////////////////////////////////////////////////////////////////
//
// database::comparator
@ -1069,45 +1084,12 @@ ircd::db::name(const column &column)
// column
//
ircd::db::column::column(database &d,
const string_view &column_name)
try
:column
{
d, *d.columns.at(column_name)
}
{
}
catch(const std::out_of_range &e)
{
log.error("'%s' failed to open non-existent column '%s'",
d.name,
column_name);
}
ircd::db::column::column(database::column &c)
:column{*c.d,c}
{
}
ircd::db::column::column(database &d,
database::column &c)
:column{shared_from(d), c}
{
}
ircd::db::column::column(std::shared_ptr<database> d,
database::column &c)
:d{std::move(d)}
,c{&c}
{
}
void
ircd::db::column::flush(const bool &blocking)
ircd::db::flush(column &column,
const bool &blocking)
{
database &d(*this);
database::column &c(*this);
database &d(column);
database::column &c(column);
rocksdb::FlushOptions opts;
opts.wait = blocking;
@ -1175,6 +1157,49 @@ ircd::db::append(rocksdb::WriteBatch &batch,
}
}
bool
ircd::db::has(column &column,
const string_view &key,
const gopts &gopts)
{
database &d(column);
database::column &c(column);
const auto k(slice(key));
auto opts(make_opts(gopts));
// Perform queries which are stymied from any sysentry
opts.read_tier = NON_BLOCKING;
// Perform a co-RP query to the filtration
if(!d.d->KeyMayExist(opts, c, k, nullptr, nullptr))
return false;
// Perform a query to the cache
auto status(d.d->Get(opts, c, k, nullptr));
if(status.IsIncomplete())
{
// DB cache miss; next query requires I/O, offload it
opts.read_tier = BLOCKING;
ctx::offload([&d, &c, &k, &opts, &status]
{
status = d.d->Get(opts, c, k, nullptr);
});
}
// Finally the result
switch(status.code())
{
using rocksdb::Status;
case Status::kOk: return true;
case Status::kNotFound: return false;
default:
throw_on_error(status);
__builtin_unreachable();
}
}
void
ircd::db::column::operator()(const op &op,
const string_view &key,
@ -1229,48 +1254,6 @@ ircd::db::column::operator()(const string_view &key,
func(string_view{v.data(), v.size()});
}
bool
ircd::db::column::has(const string_view &key,
const gopts &gopts)
{
database &d(*this);
database::column &c(*this);
const auto k(slice(key));
auto opts(make_opts(gopts));
// Perform queries which are stymied from any sysentry
opts.read_tier = NON_BLOCKING;
// Perform a co-RP query to the filtration
if(!d.d->KeyMayExist(opts, c, k, nullptr, nullptr))
return false;
// Perform a query to the cache
auto status(d.d->Get(opts, c, k, nullptr));
if(status.IsIncomplete())
{
// DB cache miss; next query requires I/O, offload it
opts.read_tier = BLOCKING;
ctx::offload([&d, &c, &k, &opts, &status]
{
status = d.d->Get(opts, c, k, nullptr);
});
}
// Finally the result
switch(status.code())
{
using rocksdb::Status;
case Status::kOk: return true;
case Status::kNotFound: return false;
default:
throw_on_error(status);
__builtin_unreachable();
}
}
///////////////////////////////////////////////////////////////////////////////
//
// column::const_iterator