Initial db.open.

pull/34/head
Ben Johnson 2014-01-11 22:51:01 -07:00
parent df8333328f
commit ee24437bfc
12 changed files with 4872 additions and 4960 deletions

20
Makefile Normal file
View File

@ -0,0 +1,20 @@
PKG=./...
TEST=.
BENCH=.
COVERPROFILE=/tmp/c.out
bench: benchpreq
go test -v -test.bench=$(BENCH) ./.bench
cover: fmt
go test -coverprofile=$(COVERPROFILE) .
go tool cover -html=$(COVERPROFILE)
rm $(COVERPROFILE)
fmt:
@go fmt ./...
test: fmt
@go test -v -cover -test.run=$(TEST) $(PKG)
.PHONY: bench cover fmt test

153
cursor.go
View File

@ -11,7 +11,6 @@ package bolt
// TODO: #define MDB_NOSPILL 0x8000 /** Do not spill pages to disk if txn is getting full, may fail instead */ // TODO: #define MDB_NOSPILL 0x8000 /** Do not spill pages to disk if txn is getting full, may fail instead */
type Cursor interface { type Cursor interface {
First() error First() error
FirstDup() error FirstDup() error
@ -32,13 +31,13 @@ type Cursor interface {
type cursor struct { type cursor struct {
flags int flags int
next *cursor _next *cursor
backup *cursor backup *cursor
xcursor *xcursor xcursor *xcursor
transaction *transaction transaction *transaction
bucketId int bucketId int
bucket *bucket bucket *Bucket
bucketx *bucketx // bucketx *bucketx
bucketFlag int bucketFlag int
snum int snum int
top int top int
@ -48,8 +47,8 @@ type cursor struct {
type xcursor struct { type xcursor struct {
cursor cursor cursor cursor
bucket *bucket bucket *Bucket
bucketx *bucketx // bucketx *bucketx
bucketFlag int bucketFlag int
} }
@ -59,7 +58,7 @@ type xcursor struct {
// P_DIRTY to set P_KEEP, P_DIRTY|P_KEEP to clear it. // P_DIRTY to set P_KEEP, P_DIRTY|P_KEEP to clear it.
// @param[in] all No shortcuts. Needed except after a full #mdb_page_flush(). // @param[in] all No shortcuts. Needed except after a full #mdb_page_flush().
// @return 0 on success, non-zero on failure. // @return 0 on success, non-zero on failure.
func (c *cursor) xkeep(unsigned pflags, int all) int { func (c *cursor) xkeep(pflags int, all int) error {
/* /*
enum { Mask = P_SUBP|P_DIRTY|P_KEEP }; enum { Mask = P_SUBP|P_DIRTY|P_KEEP };
MDB_txn *txn = mc->mc_txn; MDB_txn *txn = mc->mc_txn;
@ -116,7 +115,7 @@ func (c *cursor) xkeep(unsigned pflags, int all) int {
return rc; return rc;
*/ */
return 0 return nil
} }
// Spill pages from the dirty list back to disk. // Spill pages from the dirty list back to disk.
@ -150,7 +149,7 @@ func (c *cursor) xkeep(unsigned pflags, int all) int {
// @param[in] key For a put operation, the key being stored. // @param[in] key For a put operation, the key being stored.
// @param[in] data For a put operation, the data being stored. // @param[in] data For a put operation, the data being stored.
// @return 0 on success, non-zero on failure. // @return 0 on success, non-zero on failure.
func (c *cursor) spill(MDB_val *key, MDB_val *data) int { func (c *cursor) spill(key []byte, data []byte) error {
/* /*
MDB_txn *txn = m0->mc_txn; MDB_txn *txn = m0->mc_txn;
MDB_page *dp; MDB_page *dp;
@ -240,28 +239,28 @@ func (c *cursor) spill(MDB_val *key, MDB_val *data) int {
// Reset any dirty pages we kept that page_flush didn't see // Reset any dirty pages we kept that page_flush didn't see
rc = mdb_pages_xkeep(m0, P_DIRTY|P_KEEP, i); rc = mdb_pages_xkeep(m0, P_DIRTY|P_KEEP, i);
done: done:
txn->mt_flags |= rc ? MDB_TXN_ERROR : MDB_TXN_SPILLS; txn->mt_flags |= rc ? MDB_TXN_ERROR : MDB_TXN_SPILLS;
return rc; return rc;
/* /*
return 0 return 0
} }
// Allocate page numbers and memory for writing. Maintain me_pglast, // Allocate page numbers and memory for writing. Maintain me_pglast,
// me_pghead and mt_next_pgno. // me_pghead and mt_next_pgno.
// //
// If there are free pages available from older transactions, they // If there are free pages available from older transactions, they
// are re-used first. Otherwise allocate a new page at mt_next_pgno. // are re-used first. Otherwise allocate a new page at mt_next_pgno.
// Do not modify the freedB, just merge freeDB records into me_pghead[] // Do not modify the freedB, just merge freeDB records into me_pghead[]
// and move me_pglast to say which records were consumed. Only this // and move me_pglast to say which records were consumed. Only this
// function can create me_pghead and move me_pglast/mt_next_pgno. // function can create me_pghead and move me_pglast/mt_next_pgno.
// @param[in] mc cursor A cursor handle identifying the transaction and // @param[in] mc cursor A cursor handle identifying the transaction and
// database for which we are allocating. // database for which we are allocating.
// @param[in] num the number of pages to allocate. // @param[in] num the number of pages to allocate.
// @param[out] mp Address of the allocated page(s). Requests for multiple pages // @param[out] mp Address of the allocated page(s). Requests for multiple pages
// will always be satisfied by a single contiguous chunk of memory. // will always be satisfied by a single contiguous chunk of memory.
// @return 0 on success, non-zero on failure. // @return 0 on success, non-zero on failure.
func (c *cursor) allocPage(int num, MDB_page **mp) { func (c *cursor) allocPage(int num, MDB_page **mp) {
int rc, retry = INT_MAX; int rc, retry = INT_MAX;
MDB_txn *txn = mc->mc_txn; MDB_txn *txn = mc->mc_txn;
MDB_env *env = txn->mt_env; MDB_env *env = txn->mt_env;
@ -341,12 +340,12 @@ func (c *cursor) allocPage(int num, MDB_page **mp) {
mop = env->me_pghead; mop = env->me_pghead;
} }
env->me_pglast = last; env->me_pglast = last;
#if (MDB_DEBUG) > 1 #if (MDB_DEBUG) > 1
DPRINTF(("IDL read txn %"Z"u root %"Z"u num %u", DPRINTF(("IDL read txn %"Z"u root %"Z"u num %u",
last, txn->mt_dbs[FREE_DBI].md_root, i)); last, txn->mt_dbs[FREE_DBI].md_root, i));
for (k = i; k; k--) for (k = i; k; k--)
DPRINTF(("IDL %"Z"u", idl[k])); DPRINTF(("IDL %"Z"u", idl[k]));
#endif #endif
// Merge in descending sorted order // Merge in descending sorted order
j = mop_len; j = mop_len;
k = mop_len += i; k = mop_len += i;
@ -370,7 +369,7 @@ func (c *cursor) allocPage(int num, MDB_page **mp) {
goto fail; goto fail;
} }
search_done: search_done:
if (env->me_flags & MDB_WRITEMAP) { if (env->me_flags & MDB_WRITEMAP) {
np = (MDB_page *)(env->me_map + env->me_psize * pgno); np = (MDB_page *)(env->me_map + env->me_psize * pgno);
} else { } else {
@ -393,11 +392,11 @@ search_done:
return MDB_SUCCESS; return MDB_SUCCESS;
fail: fail:
txn->mt_flags |= MDB_TXN_ERROR; txn->mt_flags |= MDB_TXN_ERROR;
return rc; return rc;
*/ */
return 0 return nil
} }
// Copy the used portions of a non-overflow page. // Copy the used portions of a non-overflow page.
@ -491,7 +490,7 @@ func (c *cursor) page_touch() int {
np->mp_pgno = pgno; np->mp_pgno = pgno;
np->mp_flags |= P_DIRTY; np->mp_flags |= P_DIRTY;
done: done:
// Adjust cursors pointing to mp // Adjust cursors pointing to mp
mc->mc_pg[mc->mc_top] = np; mc->mc_pg[mc->mc_top] = np;
m2 = txn->mt_cursors[mc->mc_dbi]; m2 = txn->mt_cursors[mc->mc_dbi];
@ -519,7 +518,7 @@ done:
} }
return 0; return 0;
fail: fail:
txn->mt_flags |= MDB_TXN_ERROR; txn->mt_flags |= MDB_TXN_ERROR;
return rc; return rc;
*/ */
@ -588,14 +587,14 @@ func (c *cursor) search(key []byte) (*node, bool) {
nodekey.mv_data = NODEKEY(node); nodekey.mv_data = NODEKEY(node);
rc = cmp(key, &nodekey); rc = cmp(key, &nodekey);
#if MDB_DEBUG #if MDB_DEBUG
if (IS_LEAF(mp)) if (IS_LEAF(mp))
DPRINTF(("found leaf index %u [%s], rc = %i", DPRINTF(("found leaf index %u [%s], rc = %i",
i, DKEY(&nodekey), rc)); i, DKEY(&nodekey), rc));
else else
DPRINTF(("found branch index %u [%s -> %"Z"u], rc = %i", DPRINTF(("found branch index %u [%s -> %"Z"u], rc = %i",
i, DKEY(&nodekey), NODEPGNO(node), rc)); i, DKEY(&nodekey), NODEPGNO(node), rc));
#endif #endif
if (rc == 0) if (rc == 0)
break; break;
if (rc > 0) if (rc > 0)
@ -627,9 +626,9 @@ func (c *cursor) search(key []byte) (*node, bool) {
func (c *cursor) pop() { func (c *cursor) pop() {
/* /*
if (mc->mc_snum) { if (mc->mc_snum) {
#if MDB_DEBUG #if MDB_DEBUG
MDB_page *top = mc->mc_pg[mc->mc_top]; MDB_page *top = mc->mc_pg[mc->mc_top];
#endif #endif
mc->mc_snum--; mc->mc_snum--;
if (mc->mc_snum) if (mc->mc_snum)
mc->mc_top--; mc->mc_top--;
@ -748,6 +747,7 @@ func (c *cursor) searchLowest() error {
return rc; return rc;
return mdb_page_search_root(mc, NULL, MDB_PS_FIRST); return mdb_page_search_root(mc, NULL, MDB_PS_FIRST);
*/ */
return nil
} }
// Search for the page a given key should be in. // Search for the page a given key should be in.
@ -886,7 +886,7 @@ func (c *cursor) freeOverflowPage(p *page) error {
} }
if (!(env->me_flags & MDB_WRITEMAP)) if (!(env->me_flags & MDB_WRITEMAP))
mdb_dpage_free(env, mp); mdb_dpage_free(env, mp);
release: release:
// Insert in me_pghead // Insert in me_pghead
mop = env->me_pghead; mop = env->me_pghead;
j = mop[0] + ovpages; j = mop[0] + ovpages;
@ -906,7 +906,6 @@ release:
return nil return nil
} }
// Find a sibling for a page. // Find a sibling for a page.
// Replaces the page at the top of the cursor's stack with the // Replaces the page at the top of the cursor's stack with the
// specified sibling, if one exists. // specified sibling, if one exists.
@ -1013,7 +1012,7 @@ func (c *cursor) next(key []byte, data []byte, op int) error {
} else } else
mc->mc_ki[mc->mc_top]++; mc->mc_ki[mc->mc_top]++;
skip: skip:
DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u", DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u",
mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top])); mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
@ -1120,6 +1119,7 @@ func (c *cursor) prev(key []byte, data []byte, op int) error {
MDB_GET_KEY(leaf, key); MDB_GET_KEY(leaf, key);
return MDB_SUCCESS; return MDB_SUCCESS;
*/ */
return nil
} }
// Set the cursor on a specific data item. // Set the cursor on a specific data item.
@ -1233,7 +1233,7 @@ func (c *cursor) set(key []byte, data []byte, op int) (error, bool) {
mp = mc->mc_pg[mc->mc_top]; mp = mc->mc_pg[mc->mc_top];
mdb_cassert(mc, IS_LEAF(mp)); mdb_cassert(mc, IS_LEAF(mp));
set2: set2:
leaf = mdb_node_search(mc, key, exactp); leaf = mdb_node_search(mc, key, exactp);
if (exactp != NULL && !*exactp) { if (exactp != NULL && !*exactp) {
// MDB_SET specified and not an exact match. // MDB_SET specified and not an exact match.
@ -1249,7 +1249,7 @@ set2:
leaf = NODEPTR(mp, 0); leaf = NODEPTR(mp, 0);
} }
set1: set1:
mc->mc_flags |= C_INITIALIZED; mc->mc_flags |= C_INITIALIZED;
mc->mc_flags &= ~C_EOF; mc->mc_flags &= ~C_EOF;
@ -1306,7 +1306,7 @@ set1:
return rc; return rc;
*/ */
return nil return nil, false
} }
// Move the cursor to the first item in the database. // Move the cursor to the first item in the database.
@ -1356,6 +1356,7 @@ func (c *cursor) first(key []byte, data []byte) error {
// Move the cursor to the last item in the database. // Move the cursor to the last item in the database.
func (c *cursor) last() ([]byte, []byte) { func (c *cursor) last() ([]byte, []byte) {
/*
int rc; int rc;
MDB_node *leaf; MDB_node *leaf;
@ -1396,6 +1397,8 @@ func (c *cursor) last() ([]byte, []byte) {
MDB_GET_KEY(leaf, key); MDB_GET_KEY(leaf, key);
return MDB_SUCCESS; return MDB_SUCCESS;
*/
return nil, nil
} }
func (c *cursor) Get(key []byte, data []byte, op int) ([]byte, []byte, error) { func (c *cursor) Get(key []byte, data []byte, op int) ([]byte, []byte, error) {
@ -1492,7 +1495,7 @@ func (c *cursor) Get(key []byte, data []byte, op int) ([]byte, []byte, error) {
if (rc == MDB_SUCCESS) { if (rc == MDB_SUCCESS) {
if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
MDB_cursor *mx; MDB_cursor *mx;
fetchm: fetchm:
mx = &mc->mc_xcursor->mx_cursor; mx = &mc->mc_xcursor->mx_cursor;
data->mv_size = NUMKEYS(mx->mc_pg[mx->mc_top]) * data->mv_size = NUMKEYS(mx->mc_pg[mx->mc_top]) *
mx->mc_db->md_pad; mx->mc_db->md_pad;
@ -1587,12 +1590,12 @@ func (c *cursor) touch() error {
mc->mc_top = mc->mc_snum-1; mc->mc_top = mc->mc_snum-1;
} }
return rc; return rc;
} }
int int
mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
unsigned int flags) unsigned int flags)
{ {
enum { MDB_NO_ROOT = MDB_LAST_ERRCODE+10 }; // internal code enum { MDB_NO_ROOT = MDB_LAST_ERRCODE+10 }; // internal code
MDB_env *env; MDB_env *env;
MDB_node *leaf = NULL; MDB_node *leaf = NULL;
@ -1630,13 +1633,13 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
if (flags != MDB_CURRENT && key->mv_size-1 >= ENV_MAXKEY(env)) if (flags != MDB_CURRENT && key->mv_size-1 >= ENV_MAXKEY(env))
return MDB_BAD_VALSIZE; return MDB_BAD_VALSIZE;
#if SIZE_MAX > MAXDATASIZE #if SIZE_MAX > MAXDATASIZE
if (data->mv_size > ((mc->mc_db->md_flags & MDB_DUPSORT) ? ENV_MAXKEY(env) : MAXDATASIZE)) if (data->mv_size > ((mc->mc_db->md_flags & MDB_DUPSORT) ? ENV_MAXKEY(env) : MAXDATASIZE))
return MDB_BAD_VALSIZE; return MDB_BAD_VALSIZE;
#else #else
if ((mc->mc_db->md_flags & MDB_DUPSORT) && data->mv_size > ENV_MAXKEY(env)) if ((mc->mc_db->md_flags & MDB_DUPSORT) && data->mv_size > ENV_MAXKEY(env))
return MDB_BAD_VALSIZE; return MDB_BAD_VALSIZE;
#endif #endif
DPRINTF(("==> put db %d key [%s], size %"Z"u, data size %"Z"u", DPRINTF(("==> put db %d key [%s], size %"Z"u, data size %"Z"u",
DDBI(mc), DKEY(key), key ? key->mv_size : 0, data->mv_size)); DDBI(mc), DKEY(key), key ? key->mv_size : 0, data->mv_size));
@ -1745,7 +1748,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
return MDB_SUCCESS; return MDB_SUCCESS;
} }
more: more:
leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
olddata.mv_size = NODEDSZ(leaf); olddata.mv_size = NODEDSZ(leaf);
olddata.mv_data = NODEDATA(leaf); olddata.mv_data = NODEDATA(leaf);
@ -1766,14 +1769,14 @@ more:
if (flags == MDB_CURRENT) if (flags == MDB_CURRENT)
goto current; goto current;
#if UINT_MAX < SIZE_MAX #if UINT_MAX < SIZE_MAX
if (mc->mc_dbx->md_dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t)) if (mc->mc_dbx->md_dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t))
#ifdef MISALIGNED_OK #ifdef MISALIGNED_OK
mc->mc_dbx->md_dcmp = mdb_cmp_long; mc->mc_dbx->md_dcmp = mdb_cmp_long;
#else #else
mc->mc_dbx->md_dcmp = mdb_cmp_cint; mc->mc_dbx->md_dcmp = mdb_cmp_cint;
#endif #endif
#endif #endif
// if data matches, skip it // if data matches, skip it
if (!mc->mc_dbx->md_dcmp(data, &olddata)) { if (!mc->mc_dbx->md_dcmp(data, &olddata)) {
if (flags & MDB_NODUPDATA) if (flags & MDB_NODUPDATA)
@ -1837,7 +1840,7 @@ more:
if (NODESIZE + NODEKSZ(leaf) + xdata.mv_size > env->me_nodemax) { if (NODESIZE + NODEKSZ(leaf) + xdata.mv_size > env->me_nodemax) {
// Too big for a sub-page, convert to sub-DB // Too big for a sub-page, convert to sub-DB
fp_flags &= ~P_SUBP; fp_flags &= ~P_SUBP;
prep_subDB: prep_subDB:
if (mc->mc_db->md_flags & MDB_DUPFIXED) { if (mc->mc_db->md_flags & MDB_DUPFIXED) {
fp_flags |= P_LEAF2; fp_flags |= P_LEAF2;
dummy.md_pad = fp->mp_pad; dummy.md_pad = fp->mp_pad;
@ -1883,7 +1886,7 @@ prep_subDB:
mdb_node_del(mc, 0); mdb_node_del(mc, 0);
goto new_sub; goto new_sub;
} }
current: current:
// overflow page overwrites need special handling // overflow page overwrites need special handling
if (F_ISSET(leaf->mn_flags, F_BIGDATA)) { if (F_ISSET(leaf->mn_flags, F_BIGDATA)) {
MDB_page *omp; MDB_page *omp;
@ -1960,7 +1963,7 @@ current:
rdata = data; rdata = data;
new_sub: new_sub:
nflags = flags & NODE_ADD_FLAGS; nflags = flags & NODE_ADD_FLAGS;
nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->mv_size : mdb_leaf_size(env, key, rdata); nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->mv_size : mdb_leaf_size(env, key, rdata);
if (SIZELEFT(mc->mc_pg[mc->mc_top]) < nsize) { if (SIZELEFT(mc->mc_pg[mc->mc_top]) < nsize) {
@ -2001,7 +2004,7 @@ new_sub:
// DB are all zero size. // DB are all zero size.
if (do_sub) { if (do_sub) {
int xflags; int xflags;
put_sub: put_sub:
xdata.mv_size = 0; xdata.mv_size = 0;
xdata.mv_data = ""; xdata.mv_data = "";
leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
@ -2048,7 +2051,7 @@ put_sub:
mc->mc_db->md_entries++; mc->mc_db->md_entries++;
if (flags & MDB_MULTIPLE) { if (flags & MDB_MULTIPLE) {
if (!rc) { if (!rc) {
next_mult: next_mult:
mcount++; mcount++;
// let caller know how many succeeded, if any // let caller know how many succeeded, if any
data[1].mv_size = mcount; data[1].mv_size = mcount;
@ -2059,7 +2062,7 @@ next_mult:
} }
} }
} }
done: done:
// If we succeeded and the key didn't exist before, make sure // If we succeeded and the key didn't exist before, make sure
// the cursor is marked valid. // the cursor is marked valid.
if (!rc && insert) if (!rc && insert)
@ -2256,7 +2259,7 @@ func (c *cursor) addNode(index int, key []byte, data []byte, pgno int, flags int
if ((ssize_t)node_size > room) if ((ssize_t)node_size > room)
goto full; goto full;
update: update:
// Move higher pointers up one slot. // Move higher pointers up one slot.
for (i = NUMKEYS(mp); i > indx; i--) for (i = NUMKEYS(mp); i > indx; i--)
mp->mp_ptrs[i] = mp->mp_ptrs[i - 1]; mp->mp_ptrs[i] = mp->mp_ptrs[i - 1];
@ -2303,7 +2306,7 @@ update:
return MDB_SUCCESS; return MDB_SUCCESS;
full: full:
DPRINTF(("not enough room in page %"Z"u, got %u ptrs", DPRINTF(("not enough room in page %"Z"u, got %u ptrs",
mdb_dbg_pgno(mp), NUMKEYS(mp))); mdb_dbg_pgno(mp), NUMKEYS(mp)));
DPRINTF(("upper-lower = %u - %u = %"Z"d", mp->mp_upper,mp->mp_lower,room)); DPRINTF(("upper-lower = %u - %u = %"Z"d", mp->mp_upper,mp->mp_lower,room));
@ -2369,7 +2372,7 @@ func (c *cursor) deleteNode(ksize int) {
mp->mp_lower -= sizeof(indx_t); mp->mp_lower -= sizeof(indx_t);
mp->mp_upper += sz; mp->mp_upper += sz;
*/ */
} }
// Initial setup of a sorted-dups cursor. // Initial setup of a sorted-dups cursor.
@ -2441,19 +2444,19 @@ func (c *cursor) xcursor_init1(n *node) {
DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi, DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi,
mx->mx_db.md_root)); mx->mx_db.md_root));
mx->mx_dbflag = DB_VALID|DB_DIRTY; // DB_DIRTY guides mdb_cursor_touch mx->mx_dbflag = DB_VALID|DB_DIRTY; // DB_DIRTY guides mdb_cursor_touch
#if UINT_MAX < SIZE_MAX #if UINT_MAX < SIZE_MAX
if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t)) if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t))
#ifdef MISALIGNED_OK #ifdef MISALIGNED_OK
mx->mx_dbx.md_cmp = mdb_cmp_long; mx->mx_dbx.md_cmp = mdb_cmp_long;
#else #else
mx->mx_dbx.md_cmp = mdb_cmp_cint; mx->mx_dbx.md_cmp = mdb_cmp_cint;
#endif #endif
#endif #endif
*/ */
} }
// Initialize a cursor for a given transaction and database. // Initialize a cursor for a given transaction and database.
func (c *cursor) init(t *transaction, bucket *bucket, mx *xcursor) { func (c *cursor) init(t *transaction, bucket *Bucket, mx *xcursor) {
/* /*
mc->mc_next = NULL; mc->mc_next = NULL;
mc->mc_backup = NULL; mc->mc_backup = NULL;
@ -2527,7 +2530,7 @@ func (c *cursor) Transaction() Transaction {
return nil return nil
} }
func (c *cursor) Bucket() Bucket { func (c *cursor) Bucket() *Bucket {
return c.bucket return c.bucket
} }
@ -2549,7 +2552,7 @@ func (c *cursor) updateKey(key []byte) error {
mp = mc->mc_pg[mc->mc_top]; mp = mc->mc_pg[mc->mc_top];
node = NODEPTR(mp, indx); node = NODEPTR(mp, indx);
ptr = mp->mp_ptrs[indx]; ptr = mp->mp_ptrs[indx];
#if MDB_DEBUG #if MDB_DEBUG
{ {
MDB_val k2; MDB_val k2;
char kbuf2[DKBUF_MAXKEYSIZE*2+1]; char kbuf2[DKBUF_MAXKEYSIZE*2+1];
@ -2561,7 +2564,7 @@ func (c *cursor) updateKey(key []byte) error {
DKEY(key), DKEY(key),
mp->mp_pgno)); mp->mp_pgno));
} }
#endif #endif
// Sizes must be 2-byte aligned. // Sizes must be 2-byte aligned.
ksize = EVEN(key->mv_size); ksize = EVEN(key->mv_size);

444
db.go
View File

@ -1,5 +1,12 @@
package bolt package bolt
import (
"os"
"sync"
"syscall"
"unsafe"
)
const ( const (
NoSync = iota NoSync = iota
NoMetaSync NoMetaSync
@ -8,59 +15,54 @@ const (
IntegerDupKey IntegerDupKey
) )
var DatabaseAlreadyOpenedError = &Error{"Database already open"} var DatabaseAlreadyOpenedError = &Error{"Database already open", nil}
// TODO: #define MDB_FATAL_ERROR 0x80000000U /** Failed to update the meta page. Probably an I/O error. */ // TODO: #define MDB_FATAL_ERROR 0x80000000U /** Failed to update the meta page. Probably an I/O error. */
// TODO: #define MDB_ENV_ACTIVE 0x20000000U /** Some fields are initialized. */ // TODO: #define MDB_ENV_ACTIVE 0x20000000U /** Some fields are initialized. */
// TODO: #define MDB_ENV_TXKEY 0x10000000U /** me_txkey is set */ // TODO: #define MDB_ENV_TXKEY 0x10000000U /** me_txkey is set */
// TODO: #define MDB_LIVE_READER 0x08000000U /** Have liveness lock in reader table */ // TODO: #define MDB_LIVE_READER 0x08000000U /** Have liveness lock in reader table */
type DB interface { type DB struct {
syncEnabled bool
metaSyncEnabled bool
}
type db struct {
sync.Mutex sync.Mutex
opened bool opened bool
file os.File file *os.File
metafile os.File metafile *os.File
data []byte
buf []byte buf []byte
meta0 *meta
meta1 *meta
pageSize int pageSize int
readers []*reader readers []*reader
buckets []*bucket buckets []*Bucket
xbuckets []*bucketx /**< array of static DB info */ // xbuckets []*bucketx /**< array of static DB info */
bucketFlags []int /**< array of flags from MDB_db.md_flags */ bucketFlags []int /**< array of flags from MDB_db.md_flags */
path string path string
mmap []byte
mmapSize int /**< size of the data memory map */ mmapSize int /**< size of the data memory map */
size int /**< current file size */ size int /**< current file size */
meta1 []byte
meta2 []byte
pbuf []byte pbuf []byte
transaction *transaction /**< current write transaction */ transaction *transaction /**< current write transaction */
maxPageNumber int /**< me_mapsize / me_psize */ maxPageNumber int /**< me_mapsize / me_psize */
pageState pageStage /**< state of old pages from freeDB */ pageState pageState /**< state of old pages from freeDB */
dpages []*page /**< list of malloc'd blocks for re-use */ dpages []*page /**< list of malloc'd blocks for re-use */
freePages []int /** IDL of pages that became unused in a write txn */ freePages []int /** IDL of pages that became unused in a write txn */
dirtyPages []int /** ID2L of pages written during a write txn. Length MDB_IDL_UM_SIZE. */ dirtyPages []int /** ID2L of pages written during a write txn. Length MDB_IDL_UM_SIZE. */
maxFreeOnePage int /** Max number of freelist items that can fit in a single overflow page */ maxFreeOnePage int /** Max number of freelist items that can fit in a single overflow page */
maxPageDataSize int
maxNodeSize int /** Max size of a node on a page */ maxNodeSize int /** Max size of a node on a page */
maxKeySize int /**< max size of a key */ maxKeySize int /**< max size of a key */
} }
func NewDB() *DB {
func NewDB() DB { return &DB{}
return &db{}
} }
func (db *db) Path() string { func (db *DB) Path() string {
return db.path return db.path
} }
func (db *db) Open(path string, mode os.FileMode) error { func (db *DB) Open(path string, mode os.FileMode) error {
var err error var err error
db.Lock() db.Lock()
defer db.Unlock() defer db.Unlock()
@ -72,24 +74,24 @@ func (db *db) Open(path string, mode os.FileMode) error {
// Open data file and separate sync handler for metadata writes. // Open data file and separate sync handler for metadata writes.
db.path = path db.path = path
if db.file, err = os.OpenFile(db.path, O_RDWR | O_CREAT, mode); err != nil { if db.file, err = os.OpenFile(db.path, os.O_RDWR|os.O_CREATE, mode); err != nil {
db.close() db.close()
return err return err
} }
if db.metafile, err = os.OpenFile(db.path, O_RDWR | O_SYNC, mode); err != nil { if db.metafile, err = os.OpenFile(db.path, os.O_RDWR|os.O_SYNC, mode); err != nil {
db.close() db.close()
return err return err
} }
// Read enough data to get both meta pages. // Read enough data to get both meta pages.
var m, m0, m1 *meta var m, m0, m1 *meta
var buf [headerSize + unsafe.Sizeof(meta)]byte var buf [pageHeaderSize + int(unsafe.Sizeof(meta{}))]byte
if _, err := db.file.ReadAt(buf, 0); err == nil { if _, err := db.file.ReadAt(buf[:], 0); err == nil {
if m0, _ = db.page(buf[:], 0).meta(); m0 != nil { if m0, _ = db.page(buf[:], 0).meta(); m0 != nil {
db.pageSize = m0.free.pad db.pageSize = int(m0.free.pad)
} }
} }
if _, err := db.file.ReadAt(buf, db.pageSize); err == nil { if _, err := db.file.ReadAt(buf[:], int64(db.pageSize)); err == nil {
m1, _ = db.page(buf[:], 0).meta() m1, _ = db.page(buf[:], 0).meta()
} }
if m0 != nil && m1 != nil { if m0 != nil && m1 != nil {
@ -102,27 +104,16 @@ func (db *db) Open(path string, mode os.FileMode) error {
// Initialize the page size for new environments. // Initialize the page size for new environments.
if m == nil { if m == nil {
db.pageSize = os.Getpagesize() if err := db.init(); err != nil {
if db.pageSize > maxPageSize { return err
db.pageSize = maxPageSize
} }
} }
// TODO: Check mapsize. // Initialize db fields.
/* db.buf = make([]byte, db.pageSize)
// Was a mapsize configured? db.maxPageDataSize = ((db.pageSize - pageHeaderSize) / int(unsafe.Sizeof(pgno(0)))) - 1
if (!env->me_mapsize) { db.maxNodeSize = (((db.pageSize - pageHeaderSize) / minKeyCount) & -2) - int(unsafe.Sizeof(indx(0)))
// If this is a new environment, take the default, // TODO?: env->me_maxpg = env->me_mapsize / env->me_psize;
// else use the size recorded in the existing env.
env->me_mapsize = newenv ? DEFAULT_MAPSIZE : meta.mm_mapsize;
} else if (env->me_mapsize < meta.mm_mapsize) {
// If the configured size is smaller, make sure it's
// still big enough. Silently round up to minimum if not.
size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize;
if (env->me_mapsize < minsize)
env->me_mapsize = minsize;
}
*/
// Memory map the data file. // Memory map the data file.
if err := db.mmap(); err != nil { if err := db.mmap(); err != nil {
@ -130,94 +121,87 @@ func (db *db) Open(path string, mode os.FileMode) error {
return err return err
} }
// Initialize the buffer. // TODO: Initialize meta.
db.buf = make([]byte, db.pageSize) // if (newenv) {
// i = mdb_env_init_meta(env, &meta);
// if (i != MDB_SUCCESS) {
// return i;
// }
// }
// Mark the database as opened and return. // Mark the database as opened and return.
db.opened = true db.opened = true
return nil return nil
} }
// Read the meta pages and return the latest. // int mdb_env_map(MDB_env *env, void *addr, int newsize)
func (db *db) readMeta() *meta { func (db *DB) mmap() error {
m := &meta{} var err error
m.read()
/* // Determine the map size based on the file size.
if ((i = mdb_env_read_header(env, &meta)) != 0) { var size int
if (i != ENOENT) if info, err := os.Stat(db.file.Name()); err != nil {
return i; return err
DPUTS("new mdbenv"); } else if info.Size() < int64(db.pageSize*2) {
newenv = 1; return &Error{"file size too small", nil}
env->me_psize = env->me_os_psize;
if (env->me_psize > MAX_PAGESIZE)
env->me_psize = MAX_PAGESIZE;
} else { } else {
env->me_psize = meta.mm_psize; size = int(info.Size())
} }
// Memory-map the data file as a byte slice.
rc = mdb_env_map(env, meta.mm_address, newenv); if db.data, err = syscall.Mmap(int(db.file.Fd()), 0, size, syscall.PROT_READ, syscall.MAP_SHARED); err != nil {
if (rc) return err
return rc;
if (newenv) {
if (flags & MDB_FIXEDMAP)
meta.mm_address = env->me_map;
i = mdb_env_init_meta(env, &meta);
if (i != MDB_SUCCESS) {
return i;
}
} }
env->me_maxfree_1pg = (env->me_psize - PAGEHDRSZ) / sizeof(pgno_t) - 1; // TODO?: If nordahead, then: madvise(env->me_map, env->me_mapsize, MADV_RANDOM);
env->me_nodemax = (((env->me_psize - PAGEHDRSZ) / MDB_MINKEYS) & -2)
- sizeof(indx_t);
#if !(MDB_MAXKEYSIZE)
env->me_maxkey = env->me_nodemax - (NODESIZE + sizeof(MDB_db));
#endif
env->me_maxpg = env->me_mapsize / env->me_psize;
#if MDB_DEBUG // Save references to the meta pages.
{ if db.meta0, err = db.page(db.data, 0).meta(); err != nil {
int toggle = mdb_env_pick_meta(env); return &Error{"meta0 error", err}
MDB_db *db = &env->me_metas[toggle]->mm_dbs[MAIN_DBI]; }
if db.meta1, err = db.page(db.data, 1).meta(); err != nil {
DPRINTF(("opened database version %u, pagesize %u", return &Error{"meta1 error", err}
env->me_metas[0]->mm_version, env->me_psize));
DPRINTF(("using meta page %d", toggle));
DPRINTF(("depth: %u", db->md_depth));
DPRINTF(("entries: %"Z"u", db->md_entries));
DPRINTF(("branch pages: %"Z"u", db->md_branch_pages));
DPRINTF(("leaf pages: %"Z"u", db->md_leaf_pages));
DPRINTF(("overflow pages: %"Z"u", db->md_overflow_pages));
DPRINTF(("root: %"Z"u", db->md_root));
} }
#endif
return MDB_SUCCESS;
*/
return nil return nil
} }
// page retrieves a page reference from a given byte array based on the current page size. // init creates a new database file and initializes its meta pages.
func (db *db) page(b []byte, id int) *page { func (db *DB) init() error {
return (*page)(unsafe.Pointer(b[id * db.pageSize])) // Set the page size to the OS page size unless that is larger than max page size.
db.pageSize = os.Getpagesize()
if db.pageSize > maxPageSize {
db.pageSize = maxPageSize
}
// Create two meta pages on a buffer.
buf := make([]byte, db.pageSize*2)
for i := 0; i < 2; i++ {
p := db.page(buf[:], i)
p.id = pgno(i)
p.initMeta(db.pageSize)
}
// Write the buffer to our data file.
if _, err := db.metafile.WriteAt(buf, 0); err != nil {
return err
}
return nil
} }
func (db *DB) close() {
// TODO
}
// page retrieves a page reference from a given byte array based on the current page size.
func (db *DB) page(b []byte, id int) *page {
return (*page)(unsafe.Pointer(&b[id*db.pageSize]))
}
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ CONVERTED ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ // // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ CONVERTED ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
func (db *DB) freePage(p *page) {
func (db *db) freePage(p *page) {
/* /*
mp->mp_next = env->me_dpages; mp->mp_next = env->me_dpages;
VGMEMP_FREE(env, mp); VGMEMP_FREE(env, mp);
@ -225,7 +209,7 @@ func (db *db) freePage(p *page) {
*/ */
} }
func (db *db) freeDirtyPage(p *page) { func (db *DB) freeDirtyPage(p *page) {
/* /*
if (!IS_OVERFLOW(dp) || dp->mp_pages == 1) { if (!IS_OVERFLOW(dp) || dp->mp_pages == 1) {
mdb_page_free(env, dp); mdb_page_free(env, dp);
@ -237,7 +221,7 @@ func (db *db) freeDirtyPage(p *page) {
*/ */
} }
func (db *db) freeAllDirtyPages(p *page) { func (db *DB) freeAllDirtyPages(p *page) {
/* /*
MDB_env *env = txn->mt_env; MDB_env *env = txn->mt_env;
MDB_ID2L dl = txn->mt_u.dirty_list; MDB_ID2L dl = txn->mt_u.dirty_list;
@ -250,7 +234,7 @@ func (db *db) freeAllDirtyPages(p *page) {
*/ */
} }
func (db *db) sync(force bool) error { func (db *DB) sync(force bool) error {
/* /*
int rc = 0; int rc = 0;
if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) { if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) {
@ -259,10 +243,10 @@ func (db *db) sync(force bool) error {
? MS_ASYNC : MS_SYNC; ? MS_ASYNC : MS_SYNC;
if (MDB_MSYNC(env->me_map, env->me_mapsize, flags)) if (MDB_MSYNC(env->me_map, env->me_mapsize, flags))
rc = ErrCode(); rc = ErrCode();
#ifdef _WIN32 #ifdef _WIN32
else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd)) else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd))
rc = ErrCode(); rc = ErrCode();
#endif #endif
} else { } else {
if (MDB_FDATASYNC(env->me_fd)) if (MDB_FDATASYNC(env->me_fd))
rc = ErrCode(); rc = ErrCode();
@ -273,7 +257,7 @@ func (db *db) sync(force bool) error {
return nil return nil
} }
func (db *db) Transaction(parent *transaction, flags int) (*transaction, error) { func (db *DB) Transaction(parent *transaction, flags int) (*transaction, error) {
/* /*
MDB_txn *txn; MDB_txn *txn;
MDB_ntxn *ntxn; MDB_ntxn *ntxn;
@ -367,80 +351,20 @@ func (db *db) Transaction(parent *transaction, flags int) (*transaction, error)
return rc; return rc;
*/ */
return nil return nil, nil
}
// Write the environment parameters of a freshly created DB environment.
// @param[in] env the environment handle
// @param[out] meta address of where to store the meta information
// @return 0 on success, non-zero on failure.
func (db *db) initMeta(meta *meta) error {
/*
MDB_page *p, *q;
int rc;
unsigned int psize;
#ifdef _WIN32
DWORD len;
OVERLAPPED ov;
memset(&ov, 0, sizeof(ov));
#define DO_PWRITE(rc, fd, ptr, size, len, pos) do { \
ov.Offset = pos; \
rc = WriteFile(fd, ptr, size, &len, &ov); } while(0)
#else
int len;
#define DO_PWRITE(rc, fd, ptr, size, len, pos) do { \
len = pwrite(fd, ptr, size, pos); \
rc = (len >= 0); } while(0)
#endif
DPUTS("writing new meta page");
psize = env->me_psize;
meta->mm_magic = MDB_MAGIC;
meta->mm_version = MDB_DATA_VERSION;
meta->mm_mapsize = env->me_mapsize;
meta->mm_psize = psize;
meta->mm_last_pg = 1;
meta->mm_flags = env->me_flags & 0xffff;
meta->mm_flags |= MDB_INTEGERKEY;
meta->mm_dbs[0].md_root = P_INVALID;
meta->mm_dbs[1].md_root = P_INVALID;
p = calloc(2, psize);
p->mp_pgno = 0;
p->mp_flags = P_META;
*(MDB_meta *)METADATA(p) = *meta;
q = (MDB_page *)((char *)p + psize);
q->mp_pgno = 1;
q->mp_flags = P_META;
*(MDB_meta *)METADATA(q) = *meta;
DO_PWRITE(rc, env->me_fd, p, psize * 2, len, 0);
if (!rc)
rc = ErrCode();
else if ((unsigned) len == psize * 2)
rc = MDB_SUCCESS;
else
rc = ENOSPC;
free(p);
return rc;
*/
return nil
} }
// Check both meta pages to see which one is newer. // Check both meta pages to see which one is newer.
// @param[in] env the environment handle // @param[in] env the environment handle
// @return meta toggle (0 or 1). // @return meta toggle (0 or 1).
func (db *db) pickMeta() int { func (db *DB) pickMeta() int {
/* /*
return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid); return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid);
*/ */
return 0 return 0
} }
func (db *db) Create() error { func (db *DB) Create() error {
/* /*
MDB_env *e; MDB_env *e;
@ -453,10 +377,10 @@ func (db *db) Create() error {
e->me_fd = INVALID_HANDLE_VALUE; e->me_fd = INVALID_HANDLE_VALUE;
e->me_lfd = INVALID_HANDLE_VALUE; e->me_lfd = INVALID_HANDLE_VALUE;
e->me_mfd = INVALID_HANDLE_VALUE; e->me_mfd = INVALID_HANDLE_VALUE;
#ifdef MDB_USE_POSIX_SEM #ifdef MDB_USE_POSIX_SEM
e->me_rmutex = SEM_FAILED; e->me_rmutex = SEM_FAILED;
e->me_wmutex = SEM_FAILED; e->me_wmutex = SEM_FAILED;
#endif #endif
e->me_pid = getpid(); e->me_pid = getpid();
GET_PAGESIZE(e->me_os_psize); GET_PAGESIZE(e->me_os_psize);
VGMEMP_CREATE(e,0,0); VGMEMP_CREATE(e,0,0);
@ -466,81 +390,7 @@ func (db *db) Create() error {
return nil return nil
} }
// int mdb_env_map(MDB_env *env, void *addr, int newsize) func (db *DB) setMapSize(size int) error {
func (db *db) mmap(newsize int) error {
/*
MDB_page *p;
unsigned int flags = env->me_flags;
#ifdef _WIN32
int rc;
HANDLE mh;
LONG sizelo, sizehi;
sizelo = env->me_mapsize & 0xffffffff;
sizehi = env->me_mapsize >> 16 >> 16; // only needed on Win64
// Windows won't create mappings for zero length files.
// Just allocate the maxsize right now.
if (newsize) {
if (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo
|| !SetEndOfFile(env->me_fd)
|| SetFilePointer(env->me_fd, 0, NULL, 0) != 0)
return ErrCode();
}
mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ?
PAGE_READWRITE : PAGE_READONLY,
sizehi, sizelo, NULL);
if (!mh)
return ErrCode();
env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ?
FILE_MAP_WRITE : FILE_MAP_READ,
0, 0, env->me_mapsize, addr);
rc = env->me_map ? 0 : ErrCode();
CloseHandle(mh);
if (rc)
return rc;
#else
int prot = PROT_READ;
if (flags & MDB_WRITEMAP) {
prot |= PROT_WRITE;
if (ftruncate(env->me_fd, env->me_mapsize) < 0)
return ErrCode();
}
env->me_map = mmap(addr, env->me_mapsize, prot, MAP_SHARED,
env->me_fd, 0);
if (env->me_map == MAP_FAILED) {
env->me_map = NULL;
return ErrCode();
}
if (flags & MDB_NORDAHEAD) {
// Turn off readahead. It's harmful when the DB is larger than RAM.
#ifdef MADV_RANDOM
madvise(env->me_map, env->me_mapsize, MADV_RANDOM);
#else
#ifdef POSIX_MADV_RANDOM
posix_madvise(env->me_map, env->me_mapsize, POSIX_MADV_RANDOM);
#endif // POSIX_MADV_RANDOM
#endif // MADV_RANDOM
}
#endif // _WIN32
// Can happen because the address argument to mmap() is just a
// hint. mmap() can pick another, e.g. if the range is in use.
// The MAP_FIXED flag would prevent that, but then mmap could
// instead unmap existing pages to make room for the new map.
if (addr && env->me_map != addr)
return EBUSY; // TODO: Make a new MDB_* error code?
p = (MDB_page *)env->me_map;
env->me_metas[0] = METADATA(p);
env->me_metas[1] = (MDB_meta *)((char *)env->me_metas[0] + env->me_psize);
return MDB_SUCCESS;
*/
return nil
}
func (db *db) setMapSize(size int) error {
/* /*
// If env is already open, caller is responsible for making // If env is already open, caller is responsible for making
// sure there are no active txns. // sure there are no active txns.
@ -573,7 +423,7 @@ func (db *db) setMapSize(size int) error {
return nil return nil
} }
func (db *db) setMaxBucketCount(count int) error { func (db *DB) setMaxBucketCount(count int) error {
/* /*
if (env->me_map) if (env->me_map)
return EINVAL; return EINVAL;
@ -583,16 +433,17 @@ func (db *db) setMaxBucketCount(count int) error {
return nil return nil
} }
func (db *db) setMaxReaderCount(count int) error { func (db *DB) setMaxReaderCount(count int) error {
/* /*
if (env->me_map || readers < 1) if (env->me_map || readers < 1)
return EINVAL; return EINVAL;
env->me_maxreaders = readers; env->me_maxreaders = readers;
return MDB_SUCCESS; return MDB_SUCCESS;
*/ */
return nil
} }
func (db *db) getMaxReaderCount(count int) (int, error) { func (db *DB) getMaxReaderCount(count int) (int, error) {
/* /*
if (!env || !readers) if (!env || !readers)
return EINVAL; return EINVAL;
@ -602,9 +453,8 @@ func (db *db) getMaxReaderCount(count int) (int, error) {
return 0, nil return 0, nil
} }
// Destroy resources from mdb_env_open(), clear our readers & DBIs // Destroy resources from mdb_env_open(), clear our readers & DBIs
func (db *db) close0(excl) { func (db *DB) close0(excl int) {
/* /*
int i; int i;
@ -624,7 +474,7 @@ func (db *db) close0(excl) {
if (env->me_flags & MDB_ENV_TXKEY) { if (env->me_flags & MDB_ENV_TXKEY) {
pthread_key_delete(env->me_txkey); pthread_key_delete(env->me_txkey);
#ifdef _WIN32 #ifdef _WIN32
// Delete our key from the global list // Delete our key from the global list
for (i=0; i<mdb_tls_nkeys; i++) for (i=0; i<mdb_tls_nkeys; i++)
if (mdb_tls_keys[i] == env->me_txkey) { if (mdb_tls_keys[i] == env->me_txkey) {
@ -632,7 +482,7 @@ func (db *db) close0(excl) {
mdb_tls_nkeys--; mdb_tls_nkeys--;
break; break;
} }
#endif #endif
} }
if (env->me_map) { if (env->me_map) {
@ -649,14 +499,14 @@ func (db *db) close0(excl) {
for (i = env->me_numreaders; --i >= 0; ) for (i = env->me_numreaders; --i >= 0; )
if (env->me_txns->mti_readers[i].mr_pid == pid) if (env->me_txns->mti_readers[i].mr_pid == pid)
env->me_txns->mti_readers[i].mr_pid = 0; env->me_txns->mti_readers[i].mr_pid = 0;
#ifdef _WIN32 #ifdef _WIN32
if (env->me_rmutex) { if (env->me_rmutex) {
CloseHandle(env->me_rmutex); CloseHandle(env->me_rmutex);
if (env->me_wmutex) CloseHandle(env->me_wmutex); if (env->me_wmutex) CloseHandle(env->me_wmutex);
} }
// Windows automatically destroys the mutexes when // Windows automatically destroys the mutexes when
// the last handle closes. // the last handle closes.
#elif defined(MDB_USE_POSIX_SEM) #elif defined(MDB_USE_POSIX_SEM)
if (env->me_rmutex != SEM_FAILED) { if (env->me_rmutex != SEM_FAILED) {
sem_close(env->me_rmutex); sem_close(env->me_rmutex);
if (env->me_wmutex != SEM_FAILED) if (env->me_wmutex != SEM_FAILED)
@ -670,17 +520,17 @@ func (db *db) close0(excl) {
sem_unlink(env->me_txns->mti_wmname); sem_unlink(env->me_txns->mti_wmname);
} }
} }
#endif #endif
munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo));
} }
if (env->me_lfd != INVALID_HANDLE_VALUE) { if (env->me_lfd != INVALID_HANDLE_VALUE) {
#ifdef _WIN32 #ifdef _WIN32
if (excl >= 0) { if (excl >= 0) {
// Unlock the lockfile. Windows would have unlocked it // Unlock the lockfile. Windows would have unlocked it
// after closing anyway, but not necessarily at once. // after closing anyway, but not necessarily at once.
UnlockFile(env->me_lfd, 0, 0, 1, 0); UnlockFile(env->me_lfd, 0, 0, 1, 0);
} }
#endif #endif
(void) close(env->me_lfd); (void) close(env->me_lfd);
} }
@ -688,20 +538,20 @@ func (db *db) close0(excl) {
*/ */
} }
func (db *db) copyfd(handle int) error { func (db *DB) copyfd(handle int) error {
/* /*
MDB_txn *txn = NULL; MDB_txn *txn = NULL;
int rc; int rc;
size_t wsize; size_t wsize;
char *ptr; char *ptr;
#ifdef _WIN32 #ifdef _WIN32
DWORD len, w2; DWORD len, w2;
#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) #define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL)
#else #else
ssize_t len; ssize_t len;
size_t w2; size_t w2;
#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) #define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0)
#endif #endif
// Do the lock/unlock of the reader mutex before starting the // Do the lock/unlock of the reader mutex before starting the
// write txn. Otherwise other read txns could block writers. // write txn. Otherwise other read txns could block writers.
@ -769,14 +619,14 @@ func (db *db) copyfd(handle int) error {
} }
} }
leave: leave:
mdb_txn_abort(txn); mdb_txn_abort(txn);
return rc; return rc;
} }
int int
mdb_env_copy(MDB_env *env, const char *path) mdb_env_copy(MDB_env *env, const char *path)
{ {
int rc, len; int rc, len;
char *lpath; char *lpath;
HANDLE newfd = INVALID_HANDLE_VALUE; HANDLE newfd = INVALID_HANDLE_VALUE;
@ -795,33 +645,33 @@ mdb_env_copy(MDB_env *env, const char *path)
// The destination path must exist, but the destination file must not. // The destination path must exist, but the destination file must not.
// We don't want the OS to cache the writes, since the source data is // We don't want the OS to cache the writes, since the source data is
// already in the OS cache. // already in the OS cache.
#ifdef _WIN32 #ifdef _WIN32
newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW, newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW,
FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL); FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL);
#else #else
newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666); newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666);
#endif #endif
if (newfd == INVALID_HANDLE_VALUE) { if (newfd == INVALID_HANDLE_VALUE) {
rc = ErrCode(); rc = ErrCode();
goto leave; goto leave;
} }
#ifdef O_DIRECT #ifdef O_DIRECT
// Set O_DIRECT if the file system supports it // Set O_DIRECT if the file system supports it
if ((rc = fcntl(newfd, F_GETFL)) != -1) if ((rc = fcntl(newfd, F_GETFL)) != -1)
(void) fcntl(newfd, F_SETFL, rc | O_DIRECT); (void) fcntl(newfd, F_SETFL, rc | O_DIRECT);
#endif #endif
#ifdef F_NOCACHE // __APPLE__ #ifdef F_NOCACHE // __APPLE__
rc = fcntl(newfd, F_NOCACHE, 1); rc = fcntl(newfd, F_NOCACHE, 1);
if (rc) { if (rc) {
rc = ErrCode(); rc = ErrCode();
goto leave; goto leave;
} }
#endif #endif
rc = mdb_env_copyfd(env, newfd); rc = mdb_env_copyfd(env, newfd);
leave: leave:
if (!(env->me_flags & MDB_NOSUBDIR)) if (!(env->me_flags & MDB_NOSUBDIR))
free(lpath); free(lpath);
if (newfd != INVALID_HANDLE_VALUE) if (newfd != INVALID_HANDLE_VALUE)
@ -833,7 +683,7 @@ leave:
return nil return nil
} }
func (db *db) Close() { func (db *DB) Close() {
/* /*
MDB_page *dp; MDB_page *dp;
@ -862,7 +712,7 @@ func (db *db) Close() {
// @param[in] key The key for the node. // @param[in] key The key for the node.
// @param[in] data The data for the node. // @param[in] data The data for the node.
// @return The number of bytes needed to store the node. // @return The number of bytes needed to store the node.
func (db *db) LeafSize(key []byte, data []byte) int { func (db *DB) LeafSize(key []byte, data []byte) int {
/* /*
size_t sz; size_t sz;
@ -886,7 +736,7 @@ func (db *db) LeafSize(key []byte, data []byte) int {
// @param[in] env The environment handle. // @param[in] env The environment handle.
// @param[in] key The key for the node. // @param[in] key The key for the node.
// @return The number of bytes needed to store the node. // @return The number of bytes needed to store the node.
func (db *db) BranchSize(key []byte) int { func (db *DB) BranchSize(key []byte) int {
/* /*
size_t sz; size_t sz;
@ -902,7 +752,7 @@ func (db *db) BranchSize(key []byte) int {
return 0 return 0
} }
func (db *db) SetFlags(flag int, onoff bool) error { func (db *DB) SetFlags(flag int, onoff bool) error {
/* /*
if ((flag & CHANGEABLE) != flag) if ((flag & CHANGEABLE) != flag)
return EINVAL; return EINVAL;
@ -915,12 +765,7 @@ func (db *db) SetFlags(flag int, onoff bool) error {
return nil return nil
} }
func (db *db) Flags() int { func (db *DB) Stat() *Stat {
return db.flags
}
func (db *db) Stat() *Stat
/* /*
int toggle; int toggle;
@ -939,9 +784,10 @@ func (db *db) Stat() *Stat
//return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], stat); //return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], stat);
return stat return stat
*/ */
return nil
} }
func (db *db) Info() *Info { func (db *DB) Info() *Info {
/* /*
int toggle; int toggle;
@ -965,7 +811,7 @@ func (db *db) Info() *Info {
} }
// TODO: Move to bucket.go // TODO: Move to bucket.go
func (db *db) CloseBucket(b Bucket) { func (db *DB) CloseBucket(b Bucket) {
/* /*
char *ptr; char *ptr;
if (dbi <= MAIN_DBI || dbi >= env->me_maxdbs) if (dbi <= MAIN_DBI || dbi >= env->me_maxdbs)
@ -979,7 +825,7 @@ func (db *db) CloseBucket(b Bucket) {
} }
//int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) //int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
func (db *db) getReaderList() error { func (db *DB) getReaderList() error {
/* /*
unsigned int i, rdrs; unsigned int i, rdrs;
MDB_reader *mr; MDB_reader *mr;
@ -1019,7 +865,7 @@ func (db *db) getReaderList() error {
} }
// (bool return is whether reader is dead) // (bool return is whether reader is dead)
func (db *db) checkReaders() (bool, error) { func (db *DB) checkReaders() (bool, error) {
/* /*
unsigned int i, j, rdrs; unsigned int i, j, rdrs;
MDB_reader *mr; MDB_reader *mr;

27
db_test.go Normal file
View File

@ -0,0 +1,27 @@
package bolt
import (
"io/ioutil"
"os"
"testing"
"github.com/stretchr/testify/assert"
)
func TestDBOpen(t *testing.T) {
withDB(func(db *DB, path string) {
err := db.Open(path, 0666)
assert.NoError(t, err)
})
}
func withDB(fn func(*DB, string)) {
f, _ := ioutil.TempFile("", "bolt-")
path := f.Name()
f.Close()
os.Remove(path)
defer os.RemoveAll(path)
db := NewDB()
fn(db, path)
}

View File

@ -1,30 +1,34 @@
package bolt package bolt
var ( var (
KeyExistError = &Error{"Key/data pair already exists"} KeyExistError = &Error{"key/data pair already exists", nil}
NotFoundError = &Error{"No matching key/data pair found"} NotFoundError = &Error{"no matching key/data pair found", nil}
PageNotFoundError = &Error{"Requested page not found"} PageNotFoundError = &Error{"requested page not found", nil}
CorruptedError = &Error{"Located page was wrong type"} CorruptedError = &Error{"located page was wrong type", nil}
PanicError = &Error{"Update of meta page failed"} PanicError = &Error{"update of meta page failed", nil}
VersionMismatchError = &Error{"Database environment version mismatch"} VersionMismatchError = &Error{"database environment version mismatch", nil}
InvalidError = &Error{"File is not an MDB file"} InvalidError = &Error{"file is not a bolt file", nil}
MapFullError = &Error{"Environment mapsize limit reached"} MapFullError = &Error{"environment mapsize limit reached", nil}
BucketFullError = &Error{"Environment maxdbs limit reached"} BucketFullError = &Error{"environment maxdbs limit reached", nil}
ReadersFullError = &Error{"Environment maxreaders limit reached"} ReadersFullError = &Error{"environment maxreaders limit reached", nil}
TransactionFullError = &Error{"Transaction has too many dirty pages - transaction too big"} TransactionFullError = &Error{"transaction has too many dirty pages - transaction too big", nil}
CursorFullError = &Error{"Internal error - cursor stack limit reached"} CursorFullError = &Error{"internal error - cursor stack limit reached", nil}
PageFullError = &Error{"Internal error - page has no more space"} PageFullError = &Error{"internal error - page has no more space", nil}
MapResizedError = &Error{"Database contents grew beyond environment mapsize"} MapResizedError = &Error{"database contents grew beyond environment mapsize", nil}
IncompatibleError = &Error{"Operation and DB incompatible, or DB flags changed"} IncompatibleError = &Error{"operation and db incompatible, or db flags changed", nil}
BadReaderSlotError = &Error{"Invalid reuse of reader locktable slot"} BadReaderSlotError = &Error{"invalid reuse of reader locktable slot", nil}
BadTransactionError = &Error{"Transaction cannot recover - it must be aborted"} BadTransactionError = &Error{"transaction cannot recover - it must be aborted", nil}
BadValueSizeError = &Error{"Too big key/data, key is empty, or wrong DUPFIXED size"} BadValueSizeError = &Error{"too big key/data or key is empty", nil}
) )
type Error struct { type Error struct {
message string message string
cause error
} }
func (e *Error) Error() { func (e *Error) Error() string {
if e.cause != nil {
return e.message + ": " + e.cause.Error()
}
return e.message return e.message
} }

17
meta.go
View File

@ -1,7 +1,7 @@
package bolt package bolt
var ( var (
InvalidMetaPageError = &Error{"Invalid meta page"} InvalidMetaPageError = &Error{"Invalid meta page", nil}
) )
// TODO: #define mm_psize mm_dbs[0].md_pad // TODO: #define mm_psize mm_dbs[0].md_pad
@ -25,16 +25,14 @@ var (
// void *md_relctx; /**< user-provided context for md_rel */ // void *md_relctx; /**< user-provided context for md_rel */
// } MDB_dbx; // } MDB_dbx;
const magic int32 = 0xBEEFC0DE const magic uint32 = 0xC0DEC0DE
const version uint32 = 1
type meta struct { type meta struct {
magic int32 magic uint32
version int32 version uint32
mapsize int free Bucket
free bucket main Bucket
main bucket
pgno int pgno int
txnid int txnid int
} }
@ -49,7 +47,6 @@ func (m *meta) validate() error {
return nil return nil
} }
// Read the environment parameters of a DB environment before // Read the environment parameters of a DB environment before
// mapping it into memory. // mapping it into memory.
// @param[in] env the environment handle // @param[in] env the environment handle

5
os.go Normal file
View File

@ -0,0 +1,5 @@
package bolt
import (
_ "os"
)

44
page.go
View File

@ -5,9 +5,11 @@ import (
) )
const maxPageSize = 0x8000 const maxPageSize = 0x8000
const minKeyCount = 2
var _page page var _page page
const headerSize = unsafe.Offsetof(_page.ptr)
const pageHeaderSize = int(unsafe.Offsetof(_page.ptr))
const minPageKeys = 2 const minPageKeys = 2
const fillThreshold = 250 // 25% const fillThreshold = 250 // 25%
@ -20,13 +22,15 @@ const (
p_dirty = 0x10 /**< dirty page, also set for #P_SUBP pages */ p_dirty = 0x10 /**< dirty page, also set for #P_SUBP pages */
p_sub = 0x40 p_sub = 0x40
p_keep = 0x8000 /**< leave this page alone during spill */ p_keep = 0x8000 /**< leave this page alone during spill */
p_invalid = ^pgno(0)
) )
// maxCommitPages is the maximum number of pages to commit in one writev() call. // maxCommitPages is the maximum number of pages to commit in one writev() call.
const maxCommitPages 64 const maxCommitPages = 64
/* max bytes to write in one call */ /* max bytes to write in one call */
const maxWriteByteCount 0x80000000U // TODO: #define MAX_WRITE 0x80000000U >> (sizeof(ssize_t) == 4)) const maxWriteByteCount uint = 0x80000000 // TODO: #define MAX_WRITE 0x80000000U >> (sizeof(ssize_t) == 4))
// TODO: // TODO:
// #if defined(IOV_MAX) && IOV_MAX < MDB_COMMIT_PAGES // #if defined(IOV_MAX) && IOV_MAX < MDB_COMMIT_PAGES
@ -42,12 +46,14 @@ const maxWriteByteCount 0x80000000U // TODO: #define MAX_WRITE 0x80000000U >>
// TODO: #define MDB_SPLIT_REPLACE MDB_APPENDDUP /**< newkey is not new */ // TODO: #define MDB_SPLIT_REPLACE MDB_APPENDDUP /**< newkey is not new */
type pgno uint64 type pgno uint64
type txnid uint64
type indx uint16
type page struct { type page struct {
id pgno id pgno
flags int flags int
lower int lower indx
upper int upper indx
overflow int overflow int
ptr int ptr int
} }
@ -60,8 +66,8 @@ type pageState struct {
// meta returns a pointer to the metadata section of the page. // meta returns a pointer to the metadata section of the page.
func (p *page) meta() (*meta, error) { func (p *page) meta() (*meta, error) {
// Exit if page is not a meta page. // Exit if page is not a meta page.
if (p.flags & p_meta) != 0 { if (p.flags & p_meta) == 0 {
return InvalidMetaPageError return nil, InvalidMetaPageError
} }
// Cast the meta section and validate before returning. // Cast the meta section and validate before returning.
@ -72,12 +78,17 @@ func (p *page) meta() (*meta, error) {
return m, nil return m, nil
} }
// initMeta initializes a page as a new meta page.
func (p *page) initMeta(pageSize int) {
p.flags = p_meta
m := (*meta)(unsafe.Pointer(&p.ptr))
m.magic = magic
m.version = version
m.free.pad = uint32(pageSize)
m.pgno = 1
m.free.root = p_invalid
m.main.root = p_invalid
}
// nodeCount returns the number of nodes on the page. // nodeCount returns the number of nodes on the page.
func (p *page) nodeCount() int { func (p *page) nodeCount() int {
@ -86,10 +97,5 @@ func (p *page) nodeCount() int {
// remainingSize returns the number of bytes left in the page. // remainingSize returns the number of bytes left in the page.
func (p *page) remainingSize() int { func (p *page) remainingSize() int {
return p.header.upper - p.header.lower return int(p.upper - p.lower)
}
// remainingSize returns the number of bytes left in the page.
func (p *page) remainingSize() int {
return p.header.upper - p.header.lower
} }

View File

@ -1,5 +1,5 @@
package bolt package bolt
type reader struct { type reader struct {
int transactionID txnid int
} }

View File

@ -16,7 +16,7 @@ type Transaction interface {
type transaction struct { type transaction struct {
id int id int
flags int flags int
db *db db *DB
parent *transaction parent *transaction
child *transaction child *transaction
nextPageNumber int nextPageNumber int
@ -25,7 +25,7 @@ type transaction struct {
dirtyList []int dirtyList []int
reader *reader reader *reader
// TODO: bucketxs []*bucketx // TODO: bucketxs []*bucketx
buckets []*bucket buckets []*Bucket
bucketFlags []int bucketFlags []int
cursors []*cursor cursors []*cursor
// Implicit from slices? TODO: MDB_dbi mt_numdbs; // Implicit from slices? TODO: MDB_dbi mt_numdbs;
@ -38,7 +38,6 @@ type ntxn struct {
pageState pageState /**< parent transaction's saved freestate */ pageState pageState /**< parent transaction's saved freestate */
} }
func (t *transaction) allocPage(num int) *page { func (t *transaction) allocPage(num int) *page {
/* /*
MDB_env *env = txn->mt_env; MDB_env *env = txn->mt_env;
@ -327,10 +326,10 @@ func (t *transaction) renew() error {
txn->mt_txnid = meta->mm_txnid; txn->mt_txnid = meta->mm_txnid;
} }
txn->mt_txnid++; txn->mt_txnid++;
#if MDB_DEBUG #if MDB_DEBUG
if (txn->mt_txnid == mdb_debug_start) if (txn->mt_txnid == mdb_debug_start)
mdb_debug = 1; mdb_debug = 1;
#endif #endif
txn->mt_dirty_room = MDB_IDL_UM_MAX; txn->mt_dirty_room = MDB_IDL_UM_MAX;
txn->mt_u.dirty_list = env->me_dirty_list; txn->mt_u.dirty_list = env->me_dirty_list;
txn->mt_u.dirty_list[0].mid = 0; txn->mt_u.dirty_list[0].mid = 0;
@ -390,7 +389,7 @@ func (t *transaction) Renew() error {
return nil return nil
} }
func (t *transaction) DB() DB { func (t *transaction) DB() *DB {
return t.db return t.db
} }
@ -573,7 +572,7 @@ func (t *transaction) saveFreelist() error {
} while (freecnt < free_pgs[0]); } while (freecnt < free_pgs[0]);
mdb_midl_sort(free_pgs); mdb_midl_sort(free_pgs);
memcpy(data.mv_data, free_pgs, data.mv_size); memcpy(data.mv_data, free_pgs, data.mv_size);
#if (MDB_DEBUG) > 1 #if (MDB_DEBUG) > 1
{ {
unsigned int i = free_pgs[0]; unsigned int i = free_pgs[0];
DPRINTF(("IDL write txn %"Z"u root %"Z"u num %u", DPRINTF(("IDL write txn %"Z"u root %"Z"u num %u",
@ -581,7 +580,7 @@ func (t *transaction) saveFreelist() error {
for (; i; i--) for (; i; i--)
DPRINTF(("IDL %"Z"u", free_pgs[i])); DPRINTF(("IDL %"Z"u", free_pgs[i]));
} }
#endif #endif
continue; continue;
} }
@ -663,7 +662,7 @@ func (t *transaction) saveFreelist() error {
// @param[in] txn the transaction that's being committed // @param[in] txn the transaction that's being committed
// @param[in] keep number of initial pages in dirty_list to keep dirty. // @param[in] keep number of initial pages in dirty_list to keep dirty.
// @return 0 on success, non-zero on failure. // @return 0 on success, non-zero on failure.
func (t *transaction) flush(keep bool) { func (t *transaction) flush(keep bool) error {
/* /*
MDB_env *env = txn->mt_env; MDB_env *env = txn->mt_env;
MDB_ID2L dl = txn->mt_u.dirty_list; MDB_ID2L dl = txn->mt_u.dirty_list;
@ -672,14 +671,14 @@ func (t *transaction) flush(keep bool) {
size_t size = 0, pos = 0; size_t size = 0, pos = 0;
pgno_t pgno = 0; pgno_t pgno = 0;
MDB_page *dp = NULL; MDB_page *dp = NULL;
#ifdef _WIN32 #ifdef _WIN32
OVERLAPPED ov; OVERLAPPED ov;
#else #else
struct iovec iov[MDB_COMMIT_PAGES]; struct iovec iov[MDB_COMMIT_PAGES];
ssize_t wpos = 0, wsize = 0, wres; ssize_t wpos = 0, wsize = 0, wres;
size_t next_pos = 1; // impossible pos, so pos != next_pos size_t next_pos = 1; // impossible pos, so pos != next_pos
int n = 0; int n = 0;
#endif #endif
j = i = keep; j = i = keep;
@ -715,7 +714,7 @@ func (t *transaction) flush(keep bool) {
size = psize; size = psize;
if (IS_OVERFLOW(dp)) size *= dp->mp_pages; if (IS_OVERFLOW(dp)) size *= dp->mp_pages;
} }
#ifdef _WIN32 #ifdef _WIN32
else break; else break;
// Windows actually supports scatter/gather I/O, but only on // Windows actually supports scatter/gather I/O, but only on
@ -733,14 +732,14 @@ func (t *transaction) flush(keep bool) {
DPRINTF(("WriteFile: %d", rc)); DPRINTF(("WriteFile: %d", rc));
return rc; return rc;
} }
#else #else
// Write up to MDB_COMMIT_PAGES dirty pages at a time. // Write up to MDB_COMMIT_PAGES dirty pages at a time.
if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) { if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) {
if (n) { if (n) {
// Write previous page(s) // Write previous page(s)
#ifdef MDB_USE_PWRITEV #ifdef MDB_USE_PWRITEV
wres = pwritev(env->me_fd, iov, n, wpos); wres = pwritev(env->me_fd, iov, n, wpos);
#else #else
if (n == 1) { if (n == 1) {
wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos); wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos);
} else { } else {
@ -751,7 +750,7 @@ func (t *transaction) flush(keep bool) {
} }
wres = writev(env->me_fd, iov, n); wres = writev(env->me_fd, iov, n);
} }
#endif #endif
if (wres != wsize) { if (wres != wsize) {
if (wres < 0) { if (wres < 0) {
rc = ErrCode(); rc = ErrCode();
@ -775,7 +774,7 @@ func (t *transaction) flush(keep bool) {
iov[n].iov_base = (char *)dp; iov[n].iov_base = (char *)dp;
wsize += size; wsize += size;
n++; n++;
#endif // _WIN32 #endif // _WIN32
} }
for (i = keep; ++i <= pagecount; ) { for (i = keep; ++i <= pagecount; ) {
@ -789,16 +788,16 @@ func (t *transaction) flush(keep bool) {
mdb_dpage_free(env, dp); mdb_dpage_free(env, dp);
} }
done: done:
i--; i--;
txn->mt_dirty_room += i - j; txn->mt_dirty_room += i - j;
dl[0].mid = j; dl[0].mid = j;
return MDB_SUCCESS; return MDB_SUCCESS;
} }
int int
mdb_txn_commit(MDB_txn *txn) mdb_txn_commit(MDB_txn *txn)
{ {
int rc; int rc;
unsigned int i; unsigned int i;
MDB_env *env; MDB_env *env;
@ -976,16 +975,16 @@ mdb_txn_commit(MDB_txn *txn)
if (mdb_midl_shrink(&txn->mt_free_pgs)) if (mdb_midl_shrink(&txn->mt_free_pgs))
env->me_free_pgs = txn->mt_free_pgs; env->me_free_pgs = txn->mt_free_pgs;
#if (MDB_DEBUG) > 2 #if (MDB_DEBUG) > 2
mdb_audit(txn); mdb_audit(txn);
#endif #endif
if ((rc = mdb_page_flush(txn, 0)) || if ((rc = mdb_page_flush(txn, 0)) ||
(rc = mdb_env_sync(env, 0)) || (rc = mdb_env_sync(env, 0)) ||
(rc = mdb_env_write_meta(txn))) (rc = mdb_env_write_meta(txn)))
goto fail; goto fail;
done: done:
env->me_pglast = 0; env->me_pglast = 0;
env->me_txn = NULL; env->me_txn = NULL;
mdb_dbis_update(txn, 1); mdb_dbis_update(txn, 1);
@ -996,7 +995,7 @@ done:
return MDB_SUCCESS; return MDB_SUCCESS;
fail: fail:
mdb_txn_abort(txn); mdb_txn_abort(txn);
return rc; return rc;
*/ */
@ -1014,11 +1013,11 @@ func (t *transaction) writeMeta() error {
int rc, len, toggle; int rc, len, toggle;
char *ptr; char *ptr;
HANDLE mfd; HANDLE mfd;
#ifdef _WIN32 #ifdef _WIN32
OVERLAPPED ov; OVERLAPPED ov;
#else #else
int r2; int r2;
#endif #endif
toggle = txn->mt_txnid & 1; toggle = txn->mt_txnid & 1;
DPRINTF(("writing meta page %d for root page %"Z"u", DPRINTF(("writing meta page %d for root page %"Z"u",
@ -1040,11 +1039,11 @@ func (t *transaction) writeMeta() error {
rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC; rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC;
ptr = env->me_map; ptr = env->me_map;
if (toggle) { if (toggle) {
#ifndef _WIN32 // POSIX msync() requires ptr = start of OS page #ifndef _WIN32 // POSIX msync() requires ptr = start of OS page
if (meta_size < env->me_os_psize) if (meta_size < env->me_os_psize)
meta_size += meta_size; meta_size += meta_size;
else else
#endif #endif
ptr += meta_size; ptr += meta_size;
} }
if (MDB_MSYNC(ptr, meta_size, rc)) { if (MDB_MSYNC(ptr, meta_size, rc)) {
@ -1080,16 +1079,16 @@ func (t *transaction) writeMeta() error {
// Write to the SYNC fd // Write to the SYNC fd
mfd = env->me_flags & (MDB_NOSYNC|MDB_NOMETASYNC) ? mfd = env->me_flags & (MDB_NOSYNC|MDB_NOMETASYNC) ?
env->me_fd : env->me_mfd; env->me_fd : env->me_mfd;
#ifdef _WIN32 #ifdef _WIN32
{ {
memset(&ov, 0, sizeof(ov)); memset(&ov, 0, sizeof(ov));
ov.Offset = off; ov.Offset = off;
if (!WriteFile(mfd, ptr, len, (DWORD *)&rc, &ov)) if (!WriteFile(mfd, ptr, len, (DWORD *)&rc, &ov))
rc = -1; rc = -1;
} }
#else #else
rc = pwrite(mfd, ptr, len, off); rc = pwrite(mfd, ptr, len, off);
#endif #endif
if (rc != len) { if (rc != len) {
rc = rc < 0 ? ErrCode() : EIO; rc = rc < 0 ? ErrCode() : EIO;
DPUTS("write failed, disk error?"); DPUTS("write failed, disk error?");
@ -1098,19 +1097,19 @@ func (t *transaction) writeMeta() error {
// Use the non-SYNC fd; we know it will fail anyway. // Use the non-SYNC fd; we know it will fail anyway.
meta.mm_last_pg = metab.mm_last_pg; meta.mm_last_pg = metab.mm_last_pg;
meta.mm_txnid = metab.mm_txnid; meta.mm_txnid = metab.mm_txnid;
#ifdef _WIN32 #ifdef _WIN32
memset(&ov, 0, sizeof(ov)); memset(&ov, 0, sizeof(ov));
ov.Offset = off; ov.Offset = off;
WriteFile(env->me_fd, ptr, len, NULL, &ov); WriteFile(env->me_fd, ptr, len, NULL, &ov);
#else #else
r2 = pwrite(env->me_fd, ptr, len, off); r2 = pwrite(env->me_fd, ptr, len, off);
(void)r2; // Silence warnings. We don't care about pwrite's return value (void)r2; // Silence warnings. We don't care about pwrite's return value
#endif #endif
fail: fail:
env->me_flags |= MDB_FATAL_ERROR; env->me_flags |= MDB_FATAL_ERROR;
return rc; return rc;
} }
done: done:
// Memory ordering issues are irrelevant; since the entire writer // Memory ordering issues are irrelevant; since the entire writer
// is wrapped by wmutex, all of these changes will become visible // is wrapped by wmutex, all of these changes will become visible
// after the wmutex is unlocked. Since the DB is multi-version, // after the wmutex is unlocked. Since the DB is multi-version,
@ -1174,7 +1173,7 @@ func (t *transaction) getPage(id int) (*page, int, error) {
return MDB_PAGE_NOTFOUND; return MDB_PAGE_NOTFOUND;
} }
done: done:
*ret = p; *ret = p;
if (lvl) if (lvl)
*lvl = level; *lvl = level;
@ -1272,10 +1271,10 @@ func (t *transaction) Cursor(b Bucket) (Cursor, error) {
return MDB_SUCCESS; return MDB_SUCCESS;
*/ */
return nil return nil, nil
} }
func (t *transaction) Renew(c Cursor) error { func (t *transaction) Renew1(c Cursor) error {
/* /*
if (txn == NULL || mc == NULL || mc->mc_dbi >= txn->mt_numdbs) if (txn == NULL || mc == NULL || mc->mc_dbi >= txn->mt_numdbs)
return EINVAL; return EINVAL;
@ -1286,9 +1285,10 @@ func (t *transaction) Renew(c Cursor) error {
mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor); mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor);
return MDB_SUCCESS; return MDB_SUCCESS;
*/ */
return nil
} }
func (t *transaction) Delete(b *bucket, key []byte, data []byte) error { func (t *transaction) Delete(b *Bucket, key []byte, data []byte) error {
/* /*
MDB_cursor mc; MDB_cursor mc;
MDB_xcursor mx; MDB_xcursor mx;
@ -1360,9 +1360,10 @@ func (t *transaction) Put(b Bucket, key []byte, data []byte, flags int) error {
mdb_cursor_init(&mc, txn, dbi, &mx); mdb_cursor_init(&mc, txn, dbi, &mx);
return mdb_cursor_put(&mc, key, data, flags); return mdb_cursor_put(&mc, key, data, flags);
*/ */
return nil
} }
func (t *transaction) Bucket(name string, flags int) (Bucket, error) { func (t *transaction) Bucket(name string, flags int) (*Bucket, error) {
/* /*
MDB_val key, data; MDB_val key, data;
MDB_dbi i; MDB_dbi i;
@ -1467,16 +1468,19 @@ func (t *transaction) Bucket(name string, flags int) (Bucket, error) {
} }
func (t *transaction) Stat(b Bucket) *Stat { func (t *transaction) Stat(b Bucket) *Stat {
/*
if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs) if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs)
return EINVAL; return EINVAL;
if (txn->mt_dbflags[dbi] & DB_STALE) { if (txn->mt_dbflags[dbi] & DB_STALE) {
MDB_cursor mc; MDB_cursor mc;
MDB_xcursor mx; MDB_xcursor mx;
/* Stale, must read the DB's root. cursor_init does it for us. */ // Stale, must read the DB's root. cursor_init does it for us.
mdb_cursor_init(&mc, txn, dbi, &mx); mdb_cursor_init(&mc, txn, dbi, &mx);
} }
return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg); return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg);
*/
return nil
} }
func (t *transaction) BucketFlags(b Bucket) (int, error) { func (t *transaction) BucketFlags(b Bucket) (int, error) {
@ -1490,7 +1494,7 @@ func (t *transaction) BucketFlags(b Bucket) (int, error) {
return 0, nil return 0, nil
} }
func (t *transaction) Drop(b Bucket int del) error { func (t *transaction) Drop(b *Bucket, del int) error {
/* /*
MDB_cursor *mc, *m2; MDB_cursor *mc, *m2;
int rc; int rc;
@ -1531,7 +1535,7 @@ func (t *transaction) Drop(b Bucket int del) error {
txn->mt_flags |= MDB_TXN_DIRTY; txn->mt_flags |= MDB_TXN_DIRTY;
} }
leave: leave:
mdb_cursor_close(mc); mdb_cursor_close(mc);
return rc; return rc;
*/ */