glibc/db2/btree/bt_search.c
Ulrich Drepper bf7997b65c Update.
1998-06-09  Ulrich Drepper  <drepper@cygnus.com>

	* sysdeps/unix/sysv/linux/netinet/ip.h (struct ip_options): Define
	__data member only for gcc.  Reported by ak@muc.de.

	* misc/mntent.h: Undo last patch.
	* sysdeps/unix/sysv/linux/fstatvfs.c (fstatvfs): Undo last patch.
	* misc/tst/mntent.c: Adjust code for this change.

	* io/fts.c: Updated from a slightly more recent BSD version.
	* io/fts.h: Likewise.

	* libc.map: Add __libc_stack_end.

	* db2/Makefile (routines): Add lock_region.
	* db2/config.h: Update from db-2.4.14.
	* db2/db.h: Likewise.
	* db2/db_185.h: Likewise.
	* db2/db_int.h: Likewise.
	* db2/bt_close.c: Likewise.
	* db2/bt_compare.c: Likewise.
	* db2/bt_conv.c: Likewise.
	* db2/bt_cursor.c: Likewise.
	* db2/bt_delete.c: Likewise.
	* db2/bt_open.c: Likewise.
	* db2/bt_page.c: Likewise.
	* db2/bt_put.c: Likewise.
	* db2/bt_rec.c: Likewise.
	* db2/bt_recno.c: Likewise.
	* db2/bt_rsearch.c: Likewise.
	* db2/bt_search.c: Likewise.
	* db2/bt_split.c: Likewise.
	* db2/bt_stat.c: Likewise.
	* db2/btree.src: Likewise.
	* db2/btree_auto.c: Likewise.
	* db2/getlong.c: Likewise.
	* db2/db_appinit.c: Likewise.
	* db2/db_apprec.c: Likewise.
	* db2/db_byteorder.c: Likewise.
	* db2/db_err.c: Likewise.
	* db2/db_log2.c: Likewise.
	* db2/db_region.c: Likewise.
	* db2/db_salloc.c: Likewise.
	* db2/db_shash.c: Likewise.
	* db2/db.c: Likewise.
	* db2/db.src: Likewise.
	* db2/db_auto.c: Likewise.
	* db2/db_conv.c: Likewise.
	* db2/db_dispatch.c: Likewise.
	* db2/db_dup.c: Likewise.
	* db2/db_overflow.c: Likewise.
	* db2/db_pr.c: Likewise.
	* db2/db_rec.c: Likewise.
	* db2/db_ret.c: Likewise.
	* db2/db_thread.c: Likewise.
	* db2/db185.c: Likewise.
	* db2/db185_int.h: Likewise.
	* db2/dbm.c: Likewise.
	* db2/hash.c: Likewise.
	* db2/hash.src: Likewise.
	* db2/hash_auto.c: Likewise.
	* db2/hash_conv.c: Likewise.
	* db2/hash_debug.c: Likewise.
	* db2/hash_dup.c: Likewise.
	* db2/hash_func.c: Likewise.
	* db2/hash_page.c: Likewise.
	* db2/hash_rec.c: Likewise.
	* db2/hash_stat.c: Likewise.
	* db2/btree.h: Likewise.
	* db2/btree_ext.h: Likewise.
	* db2/clib_ext.h: Likewise.
	* db2/common_ext.h: Likewise.
	* db2/cxx_int.h: Likewise.
	* db2/db.h.src: Likewise.
	* db2/db_185.h.src: Likewise.
	* db2/db_am.h: Likewise.
	* db2/db_auto.h: Likewise.
	* db2/db_cxx.h: Likewise.
	* db2/db_dispatch.h: Likewise.
	* db2/db_ext.h: Likewise.
	* db2/db_int.h.src: Likewise.
	* db2/db_page.h: Likewise.
	* db2/db_shash.h: Likewise.
	* db2/db_swap.h: Likewise.
	* db2/hash.h: Likewise.
	* db2/hash_ext.h: Likewise.
	* db2/lock.h: Likewise.
	* db2/lock_ext.h: Likewise.
	* db2/log.h: Likewise.
	* db2/log_ext.h: Likewise.
	* db2/mp.h: Likewise.
	* db2/mp_ext.h: Likewise.
	* db2/mutex_ext.h: Likewise.
	* db2/os_ext.h: Likewise.
	* db2/os_func.h: Likewise.
	* db2/queue.h: Likewise.
	* db2/shqueue.h: Likewise.
	* db2/txn.h: Likewise.
	* db2/lock.c: Likewise.
	* db2/lock_conflict.c: Likewise.
	* db2/lock_deadlock.c: Likewise.
	* db2/lock_region.c: Likewise.
	* db2/lock_util.c: Likewise.
	* db2/log.c: Likewise.
	* db2/log.src: Likewise.
	* db2/log_archive.c: Likewise.
	* db2/log_auto.c: Likewise.
	* db2/log_compare.c: Likewise.
	* db2/log_findckp.c: Likewise.
	* db2/log_get.c: Likewise.
	* db2/log_put.c: Likewise.
	* db2/log_rec.c: Likewise.
	* db2/log_register.c: Likewise.
	* db2/mp_bh.c: Likewise.
	* db2/mp_fget.c: Likewise.
	* db2/mp_fopen.c: Likewise.
	* db2/mp_fput.c: Likewise.
	* db2/mp_fset.c: Likewise.
	* db2/mp_open.c: Likewise.
	* db2/mp_pr.c: Likewise.
	* db2/mp_region.c: Likewise.
	* db2/mp_sync.c: Likewise.
	* db2/68020.gcc: Likewise.
	* db2/mutex.c: Likewise.
	* db2/parisc.gcc: Likewise.
	* db2/parisc.hp: Likewise.
	* db2/sco.cc: Likewise.
	* db2/os_abs.c: Likewise.
	* db2/os_alloc.c: Likewise.
	* db2/os_config.c: Likewise.
	* db2/os_dir.c: Likewise.
	* db2/os_fid.c: Likewise.
	* db2/os_fsync.c: Likewise.
	* db2/os_map.c: Likewise.
	* db2/os_oflags.c: Likewise.
	* db2/os_open.c: Likewise.
	* db2/os_rpath.c: Likewise.
	* db2/os_rw.c: Likewise.
	* db2/os_seek.c: Likewise.
	* db2/os_sleep.c: Likewise.
	* db2/os_spin.c: Likewise.
	* db2/os_stat.c: Likewise.
	* db2/os_unlink.c: Likewise.
	* db2/db_archive.c: Likewise.
	* db2/db_checkpoint.c: Likewise.
	* db2/db_deadlock.c: Likewise.
	* db2/db_dump.c: Likewise.
	* db2/db_dump185.c: Likewise.
	* db2/db_load.c: Likewise.
	* db2/db_printlog.c: Likewise.
	* db2/db_recover.c: Likewise.
	* db2/db_stat.c: Likewise.
	* db2/txn.c: Likewise.
	* db2/txn.src: Likewise.
	* db2/txn_auto.c: Likewise.
	* db2/txn_rec.c: Likewise.

	* elf/rtld.c: Move definition of __libc_stack_end to ...
	* sysdeps/generic/dl-sysdep.h: ...here.

	* sysdeps/unix/sysv/linux/fstatvfs.c: Handle nodiratime option.
	* sysdeps/unix/sysv/linux/bits/statvfs.h: Define ST_NODIRATIME.
	* sysdeps/unix/sysv/linux/sys/mount.h: Define MS_NODIRATIME.

1998-06-08 21:44  Ulrich Drepper  <drepper@cygnus.com>

	* sysdeps/unix/sysv/linux/fstatvfs.c: Handle constant option string
	from mntent correctly.

1998-06-06  Andreas Jaeger  <aj@arthur.rhein-neckar.de>

	* sunrpc/Makefile (generated): Correct typo.

1998-06-04  Philip Blundell  <philb@gnu.org>

	* elf/elf.h (EM_ARM, et al.): New definitions.
	* sysdeps/arm/dl-machine.h: Update for new draft ARM ELF ABI.
1998-06-09 15:16:55 +00:00

352 lines
9.8 KiB
C

/*-
* See the file LICENSE for redistribution information.
*
* Copyright (c) 1996, 1997, 1998
* Sleepycat Software. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995, 1996
* Keith Bostic. All rights reserved.
*/
/*
* Copyright (c) 1990, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Mike Olson.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#ifndef lint
static const char sccsid[] = "@(#)bt_search.c 10.15 (Sleepycat) 5/6/98";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
#include <errno.h>
#include <string.h>
#endif
#include "db_int.h"
#include "db_page.h"
#include "btree.h"
/*
* __bam_search --
* Search a btree for a key.
*
* PUBLIC: int __bam_search __P((DB *,
* PUBLIC: const DBT *, u_int32_t, int, db_recno_t *, int *));
*/
int
__bam_search(dbp, key, flags, stop, recnop, exactp)
DB *dbp;
const DBT *key;
u_int32_t flags;
int stop, *exactp;
db_recno_t *recnop;
{
BTREE *t;
DB_LOCK lock;
EPG cur;
PAGE *h;
db_indx_t base, i, indx, lim;
db_pgno_t pg;
db_recno_t recno;
int cmp, jump, ret, stack;
t = dbp->internal;
recno = 0;
BT_STK_CLR(t);
/*
* There are several ways we search a btree tree. The flags argument
* specifies if we're acquiring read or write locks, if we position
* to the first or last item in a set of duplicates, if we return
* deleted items, and if we are locking pairs of pages. See btree.h
* for more details. In addition, if we're doing record numbers, we
* have to lock the entire tree regardless.
*
* If write-locking pages, we need to know whether or not to acquire a
* write lock on a page before getting it. This depends on how deep it
* is in tree, which we don't know until we acquire the root page. So,
* if we need to lock the root page we may have to upgrade it later,
* because we won't get the correct lock initially.
*
* Retrieve the root page.
*/
pg = PGNO_ROOT;
stack = F_ISSET(dbp, DB_BT_RECNUM) && LF_ISSET(S_STACK);
if ((ret = __bam_lget(dbp,
0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
return (ret);
if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
(void)__BT_LPUT(dbp, lock);
return (ret);
}
/*
* Decide if we need to save this page; if we do, write lock it.
* We deliberately don't lock-couple on this call. If the tree
* is tiny, i.e., one page, and two threads are busily updating
* the root page, we're almost guaranteed deadlocks galore, as
* each one gets a read lock and then blocks the other's attempt
* for a write lock.
*/
if (!stack &&
((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
(LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
(void)memp_fput(dbp->mpf, h, 0);
(void)__BT_LPUT(dbp, lock);
if ((ret = __bam_lget(dbp, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
return (ret);
if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
(void)__BT_LPUT(dbp, lock);
return (ret);
}
stack = 1;
}
for (;;) {
/*
* Do a binary search on the current page. If we're searching
* a leaf page, we have to manipulate the indices in groups of
* two. If we're searching an internal page, they're an index
* per page item. If we find an exact match on a leaf page,
* we're done.
*/
cur.page = h;
jump = TYPE(h) == P_LBTREE ? P_INDX : O_INDX;
for (base = 0,
lim = NUM_ENT(h) / (db_indx_t)jump; lim != 0; lim >>= 1) {
cur.indx = indx = base + ((lim >> 1) * jump);
if ((cmp = __bam_cmp(dbp, key, &cur)) == 0) {
if (TYPE(h) == P_LBTREE)
goto match;
goto next;
}
if (cmp > 0) {
base = indx + jump;
--lim;
}
}
/*
* No match found. Base is the smallest index greater than
* key and may be zero or a last + O_INDX index.
*
* If it's a leaf page, return base as the "found" value.
* Delete only deletes exact matches.
*/
if (TYPE(h) == P_LBTREE) {
*exactp = 0;
if (LF_ISSET(S_EXACT))
goto notfound;
/*
* !!!
* Possibly returning a deleted record -- DB_SET_RANGE,
* DB_KEYFIRST and DB_KEYLAST don't require an exact
* match, and we don't want to walk multiple pages here
* to find an undeleted record. This is handled in the
* __bam_c_search() routine.
*/
BT_STK_ENTER(t, h, base, lock, ret);
return (ret);
}
/*
* If it's not a leaf page, record the internal page (which is
* a parent page for the key). Decrement the base by 1 if it's
* non-zero so that if a split later occurs, the inserted page
* will be to the right of the saved page.
*/
indx = base > 0 ? base - O_INDX : base;
/*
* If we're trying to calculate the record number, sum up
* all the record numbers on this page up to the indx point.
*/
if (recnop != NULL)
for (i = 0; i < indx; ++i)
recno += GET_BINTERNAL(h, i)->nrecs;
next: pg = GET_BINTERNAL(h, indx)->pgno;
if (stack) {
/* Return if this is the lowest page wanted. */
if (LF_ISSET(S_PARENT) && stop == h->level) {
BT_STK_ENTER(t, h, indx, lock, ret);
return (ret);
}
BT_STK_PUSH(t, h, indx, lock, ret);
if (ret != 0)
goto err;
if ((ret =
__bam_lget(dbp, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
goto err;
} else {
(void)memp_fput(dbp->mpf, h, 0);
/*
* Decide if we want to return a pointer to the next
* page in the stack. If we do, write lock it and
* never unlock it.
*/
if ((LF_ISSET(S_PARENT) &&
(u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) ||
(h->level - 1) == LEAFLEVEL)
stack = 1;
if ((ret =
__bam_lget(dbp, 1, pg, stack && LF_ISSET(S_WRITE) ?
DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
goto err;
}
if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0)
goto err;
}
/* NOTREACHED */
match: *exactp = 1;
/*
* If we're trying to calculate the record number, add in the
* offset on this page and correct for the fact that records
* in the tree are 0-based.
*/
if (recnop != NULL)
*recnop = recno + (indx / P_INDX) + 1;
/*
* If we got here, we know that we have a btree leaf page.
*
* If there are duplicates, go to the first/last one. This is
* safe because we know that we're not going to leave the page,
* all duplicate sets that are not on overflow pages exist on a
* single leaf page.
*/
if (LF_ISSET(S_DUPLAST))
while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
h->inp[indx] == h->inp[indx + P_INDX])
indx += P_INDX;
else
while (indx > 0 &&
h->inp[indx] == h->inp[indx - P_INDX])
indx -= P_INDX;
/*
* Now check if we are allowed to return deleted items; if not
* find the next (or previous) non-deleted item.
*/
if (LF_ISSET(S_DELNO)) {
if (LF_ISSET(S_DUPLAST))
while (B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type) &&
indx > 0 &&
h->inp[indx] == h->inp[indx - P_INDX])
indx -= P_INDX;
else
while (B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type) &&
indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
h->inp[indx] == h->inp[indx + P_INDX])
indx += P_INDX;
if (B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type))
goto notfound;
}
BT_STK_ENTER(t, h, indx, lock, ret);
return (ret);
notfound:
(void)memp_fput(dbp->mpf, h, 0);
(void)__BT_LPUT(dbp, lock);
ret = DB_NOTFOUND;
err: if (t->bt_csp > t->bt_sp) {
BT_STK_POP(t);
__bam_stkrel(dbp);
}
return (ret);
}
/*
* __bam_stkrel --
* Release all pages currently held in the stack.
*
* PUBLIC: int __bam_stkrel __P((DB *));
*/
int
__bam_stkrel(dbp)
DB *dbp;
{
BTREE *t;
EPG *epg;
t = dbp->internal;
for (epg = t->bt_sp; epg <= t->bt_csp; ++epg) {
(void)memp_fput(dbp->mpf, epg->page, 0);
(void)__BT_TLPUT(dbp, epg->lock);
}
return (0);
}
/*
* __bam_stkgrow --
* Grow the stack.
*
* PUBLIC: int __bam_stkgrow __P((BTREE *));
*/
int
__bam_stkgrow(t)
BTREE *t;
{
EPG *p;
size_t entries;
entries = t->bt_esp - t->bt_sp;
if ((p = (EPG *)__db_calloc(entries * 2, sizeof(EPG))) == NULL)
return (ENOMEM);
memcpy(p, t->bt_sp, entries * sizeof(EPG));
if (t->bt_sp != t->bt_stack)
FREE(t->bt_sp, entries * sizeof(EPG));
t->bt_sp = p;
t->bt_csp = p + entries;
t->bt_esp = p + entries * 2;
return (0);
}