* Fix another bug in Berkeley DB's DB_REGISTER feature (Oracle should

hire me...).  There is a very small race between processes exiting
  the environment and processes entering, such that the entering
  process would incorrectly conclude that the exiting process had
  died and recovery was necessary.  However, this race was triggering
  crashes on a daily basis in our build farm.

svn path=/nixpkgs/trunk/; revision=8712
This commit is contained in:
Eelco Dolstra 2007-05-16 19:30:57 +00:00
parent d3b29d16a0
commit 422da176d7
2 changed files with 48 additions and 1 deletions

View File

@ -11,5 +11,5 @@ stdenv.mkDerivation {
(if cxxSupport then "--enable-cxx" else "--disable-cxx")
(if compat185 then "--enable-compat185" else "--disable-compat185")
];
patches = [./cygwin-4.5.patch];
patches = [./cygwin-4.5.patch ./register-race-fix.patch];
}

View File

@ -0,0 +1,47 @@
diff -rc db-4.5.20-orig/env/env_register.c db-4.5.20/env/env_register.c
*** db-4.5.20-orig/env/env_register.c 2006-09-09 16:29:04.000000000 +0200
--- db-4.5.20/env/env_register.c 2007-05-16 21:13:27.000000000 +0200
***************
*** 255,260 ****
--- 255,262 ----
buf[nr - 1] = '\0';
}
+ //sleep(3);
+
pos = (off_t)lcnt * PID_LEN;
if (REGISTRY_LOCK(dbenv, pos, 1) == 0) {
if ((ret = REGISTRY_UNLOCK(dbenv, pos)) != 0)
***************
*** 361,366 ****
--- 363,392 ----
if (recovery_failed)
goto err;
+ //sleep(5);
+
+ /*
+ * Acquire an exclusive lock to prevent a race like this:
+ *
+ * 1) Process X is about to exit and process Y is just
+ * starting.
+ * 2) Process Y reads X's slot.
+ * 3) Process X clears its slot.
+ * 4) Process Y sees that X's slot isn't cleared yet (since it
+ * just read the old value).
+ * 5) Process X closes the registry, releases the lock on its
+ * slot.
+ * 6) Process Y tries to acquire X's slot and succeeds, so it
+ * concludes that X died and recovery is needed.
+ *
+ * A more efficient solution to this problem would be to let
+ * __envreg_add acquire the lock on a slot first, and *then*
+ * read the slot (instead of the other way around). Then we
+ * wouldn't need the exclusive lock here.
+ */
+ if ((ret = REGISTRY_EXCL_LOCK(dbenv, 0)) != 0)
+ goto err;
+
/*
* Why isn't an exclusive lock necessary to discard a DB_ENV handle?
*