2003-10-14 18:33:00 +03:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <fcntl.h>
|
2003-05-26 16:45:00 +03:00
|
|
|
|
2003-05-26 18:09:10 +03:00
|
|
|
#include <memory>
|
|
|
|
|
2003-10-14 18:33:00 +03:00
|
|
|
#include "db.hh"
|
|
|
|
#include "util.hh"
|
|
|
|
#include "pathlocks.hh"
|
|
|
|
|
2003-05-26 16:45:00 +03:00
|
|
|
|
2003-07-31 16:47:13 +03:00
|
|
|
/* Wrapper class to ensure proper destruction. */
|
|
|
|
class DestroyDbc
|
2003-05-26 16:45:00 +03:00
|
|
|
{
|
2003-07-31 16:47:13 +03:00
|
|
|
Dbc * dbc;
|
2003-05-26 16:45:00 +03:00
|
|
|
public:
|
2003-07-31 16:47:13 +03:00
|
|
|
DestroyDbc(Dbc * _dbc) : dbc(_dbc) { }
|
|
|
|
~DestroyDbc() { dbc->close(); /* close() frees dbc */ }
|
2003-05-26 16:45:00 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2003-07-31 16:47:13 +03:00
|
|
|
static void rethrow(DbException & e)
|
|
|
|
{
|
|
|
|
throw Error(e.what());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Transaction::Transaction()
|
|
|
|
: txn(0)
|
2003-05-26 16:45:00 +03:00
|
|
|
{
|
2003-07-31 16:47:13 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Transaction::Transaction(Database & db)
|
|
|
|
{
|
|
|
|
db.requireEnv();
|
2003-07-31 19:05:35 +03:00
|
|
|
try {
|
|
|
|
db.env->txn_begin(0, &txn, 0);
|
|
|
|
} catch (DbException e) { rethrow(e); }
|
2003-07-31 16:47:13 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Transaction::~Transaction()
|
|
|
|
{
|
2003-07-31 19:05:35 +03:00
|
|
|
if (txn) abort();
|
2003-07-31 16:47:13 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Transaction::commit()
|
|
|
|
{
|
|
|
|
if (!txn) throw Error("commit called on null transaction");
|
2003-07-31 19:05:35 +03:00
|
|
|
debug(format("committing transaction %1%") % (void *) txn);
|
|
|
|
DbTxn * txn2 = txn;
|
2003-07-31 16:47:13 +03:00
|
|
|
txn = 0;
|
2003-07-31 19:05:35 +03:00
|
|
|
try {
|
|
|
|
txn2->commit(0);
|
|
|
|
} catch (DbException e) { rethrow(e); }
|
2003-07-31 16:47:13 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-07-31 19:05:35 +03:00
|
|
|
void Transaction::abort()
|
2003-07-31 16:47:13 +03:00
|
|
|
{
|
2003-07-31 19:05:35 +03:00
|
|
|
if (!txn) throw Error("abort called on null transaction");
|
|
|
|
debug(format("aborting transaction %1%") % (void *) txn);
|
|
|
|
DbTxn * txn2 = txn;
|
|
|
|
txn = 0;
|
|
|
|
try {
|
|
|
|
txn2->abort();
|
|
|
|
} catch (DbException e) { rethrow(e); }
|
2003-07-31 16:47:13 +03:00
|
|
|
}
|
2003-05-26 16:45:00 +03:00
|
|
|
|
2003-07-31 16:47:13 +03:00
|
|
|
|
2003-10-15 15:42:39 +03:00
|
|
|
void Transaction::moveTo(Transaction & t)
|
|
|
|
{
|
|
|
|
if (t.txn) throw Error("target txn already exists");
|
|
|
|
t.txn = txn;
|
|
|
|
txn = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-07-31 19:05:35 +03:00
|
|
|
void Database::requireEnv()
|
2003-07-31 16:47:13 +03:00
|
|
|
{
|
2003-07-31 19:05:35 +03:00
|
|
|
if (!env) throw Error("database environment not open");
|
|
|
|
}
|
2003-07-31 16:47:13 +03:00
|
|
|
|
2003-05-26 16:45:00 +03:00
|
|
|
|
2003-07-31 19:05:35 +03:00
|
|
|
Db * Database::getDb(TableId table)
|
|
|
|
{
|
|
|
|
map<TableId, Db *>::iterator i = tables.find(table);
|
|
|
|
if (i == tables.end())
|
|
|
|
throw Error("unknown table id");
|
|
|
|
return i->second;
|
2003-05-26 16:45:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-07-31 16:47:13 +03:00
|
|
|
Database::Database()
|
|
|
|
: env(0)
|
2003-07-31 19:05:35 +03:00
|
|
|
, nextId(1)
|
2003-05-26 16:45:00 +03:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-07-31 16:47:13 +03:00
|
|
|
Database::~Database()
|
|
|
|
{
|
2003-10-14 18:33:00 +03:00
|
|
|
close();
|
|
|
|
}
|
2003-07-31 19:05:35 +03:00
|
|
|
|
|
|
|
|
2003-10-14 18:33:00 +03:00
|
|
|
int getAccessorCount(int fd)
|
|
|
|
{
|
|
|
|
if (lseek(fd, 0, SEEK_SET) == -1)
|
|
|
|
throw SysError("seeking accessor count");
|
|
|
|
char buf[128];
|
|
|
|
int len;
|
|
|
|
if ((len = read(fd, buf, sizeof(buf) - 1)) == -1)
|
|
|
|
throw SysError("reading accessor count");
|
|
|
|
buf[len] = 0;
|
|
|
|
int count;
|
|
|
|
if (sscanf(buf, "%d", &count) != 1) {
|
|
|
|
debug(format("accessor count is invalid: `%1%'") % buf);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
2003-07-31 19:05:35 +03:00
|
|
|
|
|
|
|
|
2003-10-14 18:33:00 +03:00
|
|
|
void setAccessorCount(int fd, int n)
|
|
|
|
{
|
|
|
|
if (lseek(fd, 0, SEEK_SET) == -1)
|
|
|
|
throw SysError("seeking accessor count");
|
|
|
|
string s = (format("%1%") % n).str();
|
|
|
|
const char * s2 = s.c_str();
|
|
|
|
if (write(fd, s2, strlen(s2)) != (ssize_t) strlen(s2) ||
|
|
|
|
ftruncate(fd, strlen(s2)) != 0)
|
|
|
|
throw SysError("writing accessor count");
|
|
|
|
}
|
2003-07-31 19:05:35 +03:00
|
|
|
|
2003-10-14 18:33:00 +03:00
|
|
|
|
|
|
|
void openEnv(DbEnv * env, const string & path, u_int32_t flags)
|
|
|
|
{
|
|
|
|
env->open(path.c_str(),
|
|
|
|
DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN |
|
|
|
|
DB_CREATE | flags,
|
|
|
|
0666);
|
2003-07-31 16:47:13 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Database::open(const string & path)
|
|
|
|
{
|
2003-10-14 18:33:00 +03:00
|
|
|
if (env) throw Error(format("environment already open"));
|
|
|
|
|
2003-07-31 16:47:13 +03:00
|
|
|
try {
|
|
|
|
|
2003-10-14 18:33:00 +03:00
|
|
|
debug(format("opening database environment"));
|
|
|
|
|
|
|
|
|
|
|
|
/* Create the database environment object. */
|
2003-07-31 16:47:13 +03:00
|
|
|
env = new DbEnv(0);
|
|
|
|
|
2003-07-31 17:28:49 +03:00
|
|
|
env->set_lg_bsize(32 * 1024); /* default */
|
|
|
|
env->set_lg_max(256 * 1024); /* must be > 4 * lg_bsize */
|
2003-07-31 19:05:35 +03:00
|
|
|
env->set_lk_detect(DB_LOCK_DEFAULT);
|
2003-10-16 11:52:44 +03:00
|
|
|
env->set_flags(DB_TXN_WRITE_NOSYNC, 1);
|
2003-07-31 17:28:49 +03:00
|
|
|
|
2003-10-14 18:33:00 +03:00
|
|
|
|
|
|
|
/* The following code provides automatic recovery of the
|
|
|
|
database environment. Recovery is necessary when a process
|
|
|
|
dies while it has the database open. To detect this,
|
|
|
|
processes atomically increment a counter when the open the
|
|
|
|
database, and decrement it when they close it. If we see
|
|
|
|
that counter is > 0 but no processes are accessing the
|
|
|
|
database---determined by attempting to obtain a write lock
|
|
|
|
on a lock file on which all accessors have a read lock---we
|
|
|
|
must run recovery. Note that this also ensures that we
|
|
|
|
only run recovery when there are no other accessors (which
|
|
|
|
could cause database corruption). */
|
|
|
|
|
|
|
|
/* !!! close fdAccessors / fdLock on exception */
|
|
|
|
|
|
|
|
/* Open the accessor count file. */
|
|
|
|
string accessorsPath = path + "/accessor_count";
|
|
|
|
fdAccessors = ::open(accessorsPath.c_str(), O_RDWR | O_CREAT, 0666);
|
|
|
|
if (fdAccessors == -1)
|
|
|
|
throw SysError(format("opening file `%1%'") % accessorsPath);
|
|
|
|
|
|
|
|
/* Open the lock file. */
|
|
|
|
string lockPath = path + "/access_lock";
|
|
|
|
fdLock = ::open(lockPath.c_str(), O_RDWR | O_CREAT, 0666);
|
|
|
|
if (fdLock == -1)
|
|
|
|
throw SysError(format("opening lock file `%1%'") % lockPath);
|
|
|
|
|
|
|
|
/* Try to acquire a write lock. */
|
|
|
|
debug(format("attempting write lock on `%1%'") % lockPath);
|
|
|
|
if (lockFile(fdLock, ltWrite, false)) { /* don't wait */
|
|
|
|
|
|
|
|
debug(format("write lock granted"));
|
|
|
|
|
|
|
|
/* We have a write lock, which means that there are no
|
|
|
|
other readers or writers. */
|
|
|
|
|
|
|
|
int n = getAccessorCount(fdAccessors);
|
|
|
|
setAccessorCount(fdAccessors, 1);
|
|
|
|
|
|
|
|
if (n != 0) {
|
|
|
|
msg(lvlTalkative, format("accessor count is %1%, running recovery") % n);
|
|
|
|
|
|
|
|
/* Open the environment after running recovery. */
|
|
|
|
openEnv(env, path, DB_RECOVER);
|
|
|
|
}
|
|
|
|
|
|
|
|
else
|
|
|
|
/* Open the environment normally. */
|
|
|
|
openEnv(env, path, 0);
|
|
|
|
|
|
|
|
/* Downgrade to a read lock. */
|
|
|
|
debug(format("downgrading to read lock on `%1%'") % lockPath);
|
|
|
|
lockFile(fdLock, ltRead, true);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
/* There are other accessors. */
|
|
|
|
debug(format("write lock refused"));
|
|
|
|
|
|
|
|
/* Acquire a read lock. */
|
|
|
|
debug(format("acquiring read lock on `%1%'") % lockPath);
|
|
|
|
lockFile(fdLock, ltRead, true); /* wait indefinitely */
|
|
|
|
|
|
|
|
/* Increment the accessor count. */
|
|
|
|
lockFile(fdAccessors, ltWrite, true);
|
|
|
|
int n = getAccessorCount(fdAccessors) + 1;
|
|
|
|
setAccessorCount(fdAccessors, n);
|
|
|
|
debug(format("incremented accessor count to %1%") % n);
|
|
|
|
lockFile(fdAccessors, ltNone, true);
|
|
|
|
|
|
|
|
/* Open the environment normally. */
|
|
|
|
openEnv(env, path, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (DbException e) { rethrow(e); }
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Database::close()
|
|
|
|
{
|
|
|
|
if (!env) return;
|
|
|
|
|
|
|
|
/* Close the database environment. */
|
|
|
|
debug(format("closing database environment"));
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
for (map<TableId, Db *>::iterator i = tables.begin();
|
|
|
|
i != tables.end(); i++)
|
|
|
|
{
|
|
|
|
debug(format("closing table %1%") % i->first);
|
|
|
|
Db * db = i->second;
|
2003-10-16 11:52:44 +03:00
|
|
|
db->close(DB_NOSYNC);
|
2003-10-14 18:33:00 +03:00
|
|
|
delete db;
|
|
|
|
}
|
|
|
|
|
2003-10-16 11:52:44 +03:00
|
|
|
// env->txn_checkpoint(0, 0, 0);
|
2003-10-14 18:33:00 +03:00
|
|
|
env->close(0);
|
2003-07-31 17:28:49 +03:00
|
|
|
|
2003-07-31 16:47:13 +03:00
|
|
|
} catch (DbException e) { rethrow(e); }
|
2003-10-14 18:33:00 +03:00
|
|
|
|
|
|
|
delete env;
|
|
|
|
|
|
|
|
/* Decrement the accessor count. */
|
|
|
|
lockFile(fdAccessors, ltWrite, true);
|
|
|
|
int n = getAccessorCount(fdAccessors) - 1;
|
|
|
|
setAccessorCount(fdAccessors, n);
|
|
|
|
debug(format("decremented accessor count to %1%") % n);
|
|
|
|
lockFile(fdAccessors, ltNone, true);
|
|
|
|
|
|
|
|
::close(fdAccessors);
|
|
|
|
::close(fdLock);
|
2003-07-31 16:47:13 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-07-31 19:05:35 +03:00
|
|
|
TableId Database::openTable(const string & tableName)
|
2003-05-26 16:45:00 +03:00
|
|
|
{
|
2003-07-31 19:05:35 +03:00
|
|
|
requireEnv();
|
|
|
|
TableId table = nextId++;
|
|
|
|
|
2003-05-26 16:45:00 +03:00
|
|
|
try {
|
2003-07-31 19:05:35 +03:00
|
|
|
|
|
|
|
Db * db = new Db(env, 0);
|
|
|
|
|
|
|
|
try {
|
2003-10-16 11:52:44 +03:00
|
|
|
db->open(0, tableName.c_str(), 0,
|
|
|
|
DB_HASH, DB_CREATE | DB_AUTO_COMMIT, 0666);
|
2003-07-31 19:05:35 +03:00
|
|
|
} catch (...) {
|
|
|
|
delete db;
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
|
|
|
|
tables[table] = db;
|
|
|
|
|
2003-05-26 16:45:00 +03:00
|
|
|
} catch (DbException e) { rethrow(e); }
|
2003-07-31 19:05:35 +03:00
|
|
|
|
|
|
|
return table;
|
2003-05-26 16:45:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-07-31 19:05:35 +03:00
|
|
|
bool Database::queryString(const Transaction & txn, TableId table,
|
2003-05-26 16:45:00 +03:00
|
|
|
const string & key, string & data)
|
|
|
|
{
|
|
|
|
try {
|
2003-07-31 19:05:35 +03:00
|
|
|
Db * db = getDb(table);
|
2003-05-26 16:45:00 +03:00
|
|
|
|
|
|
|
Dbt kt((void *) key.c_str(), key.length());
|
|
|
|
Dbt dt;
|
|
|
|
|
2003-07-31 16:47:13 +03:00
|
|
|
int err = db->get(txn.txn, &kt, &dt, 0);
|
2003-05-26 16:45:00 +03:00
|
|
|
if (err) return false;
|
|
|
|
|
2003-07-16 23:00:51 +03:00
|
|
|
if (!dt.get_data())
|
|
|
|
data = "";
|
|
|
|
else
|
|
|
|
data = string((char *) dt.get_data(), dt.get_size());
|
2003-05-26 16:45:00 +03:00
|
|
|
|
|
|
|
} catch (DbException e) { rethrow(e); }
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-07-31 19:05:35 +03:00
|
|
|
bool Database::queryStrings(const Transaction & txn, TableId table,
|
2003-07-07 12:25:26 +03:00
|
|
|
const string & key, Strings & data)
|
|
|
|
{
|
|
|
|
string d;
|
|
|
|
|
2003-07-31 16:47:13 +03:00
|
|
|
if (!queryString(txn, table, key, d))
|
2003-07-07 12:25:26 +03:00
|
|
|
return false;
|
|
|
|
|
|
|
|
string::iterator it = d.begin();
|
|
|
|
|
|
|
|
while (it != d.end()) {
|
|
|
|
|
|
|
|
if (it + 4 > d.end())
|
|
|
|
throw Error(format("short db entry: `%1%'") % d);
|
|
|
|
|
|
|
|
unsigned int len;
|
|
|
|
len = (unsigned char) *it++;
|
|
|
|
len |= ((unsigned char) *it++) << 8;
|
|
|
|
len |= ((unsigned char) *it++) << 16;
|
|
|
|
len |= ((unsigned char) *it++) << 24;
|
|
|
|
|
|
|
|
if (it + len > d.end())
|
|
|
|
throw Error(format("short db entry: `%1%'") % d);
|
|
|
|
|
|
|
|
string s;
|
|
|
|
while (len--) s += *it++;
|
|
|
|
|
|
|
|
data.push_back(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-07-31 19:05:35 +03:00
|
|
|
void Database::setString(const Transaction & txn, TableId table,
|
2003-05-26 16:45:00 +03:00
|
|
|
const string & key, const string & data)
|
|
|
|
{
|
|
|
|
try {
|
2003-07-31 19:05:35 +03:00
|
|
|
Db * db = getDb(table);
|
2003-05-26 16:45:00 +03:00
|
|
|
Dbt kt((void *) key.c_str(), key.length());
|
|
|
|
Dbt dt((void *) data.c_str(), data.length());
|
2003-07-31 16:47:13 +03:00
|
|
|
db->put(txn.txn, &kt, &dt, 0);
|
2003-05-26 16:45:00 +03:00
|
|
|
} catch (DbException e) { rethrow(e); }
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-07-31 19:05:35 +03:00
|
|
|
void Database::setStrings(const Transaction & txn, TableId table,
|
2003-07-07 12:25:26 +03:00
|
|
|
const string & key, const Strings & data)
|
|
|
|
{
|
|
|
|
string d;
|
|
|
|
|
|
|
|
for (Strings::const_iterator it = data.begin();
|
|
|
|
it != data.end(); it++)
|
|
|
|
{
|
|
|
|
string s = *it;
|
|
|
|
unsigned int len = s.size();
|
|
|
|
|
|
|
|
d += len & 0xff;
|
|
|
|
d += (len >> 8) & 0xff;
|
|
|
|
d += (len >> 16) & 0xff;
|
|
|
|
d += (len >> 24) & 0xff;
|
|
|
|
|
|
|
|
d += s;
|
|
|
|
}
|
|
|
|
|
2003-07-31 16:47:13 +03:00
|
|
|
setString(txn, table, key, d);
|
2003-07-07 12:25:26 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-07-31 19:05:35 +03:00
|
|
|
void Database::delPair(const Transaction & txn, TableId table,
|
2003-05-26 16:45:00 +03:00
|
|
|
const string & key)
|
|
|
|
{
|
|
|
|
try {
|
2003-07-31 19:05:35 +03:00
|
|
|
Db * db = getDb(table);
|
2003-05-26 16:45:00 +03:00
|
|
|
Dbt kt((void *) key.c_str(), key.length());
|
2003-07-31 16:47:13 +03:00
|
|
|
db->del(txn.txn, &kt, 0);
|
2003-10-08 18:06:59 +03:00
|
|
|
/* Non-existence of a pair with the given key is not an
|
|
|
|
error. */
|
2003-05-26 16:45:00 +03:00
|
|
|
} catch (DbException e) { rethrow(e); }
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-07-31 19:05:35 +03:00
|
|
|
void Database::enumTable(const Transaction & txn, TableId table,
|
2003-07-17 15:27:55 +03:00
|
|
|
Strings & keys)
|
2003-05-26 16:45:00 +03:00
|
|
|
{
|
|
|
|
try {
|
2003-07-31 19:05:35 +03:00
|
|
|
Db * db = getDb(table);
|
2003-05-26 16:45:00 +03:00
|
|
|
|
2003-07-31 16:47:13 +03:00
|
|
|
Dbc * dbc;
|
2003-07-31 19:05:35 +03:00
|
|
|
db->cursor(txn.txn, &dbc, 0);
|
2003-07-31 16:47:13 +03:00
|
|
|
DestroyDbc destroyDbc(dbc);
|
2003-05-26 16:45:00 +03:00
|
|
|
|
|
|
|
Dbt kt, dt;
|
2003-07-31 16:47:13 +03:00
|
|
|
while (dbc->get(&kt, &dt, DB_NEXT) != DB_NOTFOUND)
|
2003-07-17 15:27:55 +03:00
|
|
|
keys.push_back(
|
|
|
|
string((char *) kt.get_data(), kt.get_size()));
|
2003-05-26 16:45:00 +03:00
|
|
|
|
|
|
|
} catch (DbException e) { rethrow(e); }
|
|
|
|
}
|