Details
Description
How to reproduce:
1) Attach second node to a Couchbase claster, start rebalance process and in that time call lcb_get function many times. As a result application go down (assert in 0).
assert(ringbuffer_read(&src->pending_cookies, &ct, sizeof(ct)) == sizeof(ct));
src->pending_cookies->nbytes == 0 in that case, so ringbuffer_read return 0 so assert is 0
and application is over.
We think, that nbytes == 0 is a common situation, than src->connected == 0 and this situation must not interpretated as error.
Below you can find additional information
Stack:
#0 0xc000000000211ab0:0 in _lwp_kill+0x30 () from /lib/hpux64/libpthread.so.1
#1 0xc000000000178810:0 in pthread_kill ()
at /ux/core/libs/threadslibs/src/common/pthreads/signal.c:207
#2 0xc0000000003f8140:0 in raise ()
at ../../../../../core/libs/libc/shared_em_64_perf/../core/gen/raise.c:33
#3 0xc000000000508c50:0 in abort ()
at ../../../../../core/libs/libc/shared_em_64_perf/../core/gen/abort_em.c:84
#4 0xc0000000003c3f90:0 in _assert ()
at ../../../../../core/libs/libc/shared_em_64_perf/../core/gen/assert.c:57
#5 0xc0000000076a9080:0 in relocate_packets (src=0x6000000001c63c00,
dst_instance=0x60000000064f6800) at src/instance.c:542
#6 0xc0000000076a9b40:0 in lcb_update_serverlist (instance=0x60000000064f6800)
at src/instance.c:606
#7 0xc0000000076ac7e0:0 in vbucket_stream_handler (sock=31, which=2,
arg=0x60000000064f6800) at src/instance.c:996
#8 0xc000000007d6db60:0 in event_base_loop (base=0x6000000006381780,
flags=<not available>) at event.c:1346
#9 0xc000000000a3aed0:0 in lcb_io_run_event_loop (iops=0x60000000061bff20)
at plugins/io/libevent/plugin-libevent.c:324
#10 0xc0000000076c2750:0 in lcb_wait (instance=0x60000000064f6800)
at src/wait.c:60
#11 0xc0000000109c5e00:0 in Cache_Storage::CCouchbaseStorage::do_usefull_work (
---Type <return> to continue, or q <return> to quit---
this=0x6000000006224280) at cache_source/ucouchbasestorage.cpp:524
#12 0xc0000000109c7850:0 in Cache_Storage::CCouchbaseStorage::execute (
this=0x6000000006224280) at cache_source/ucouchbasestorage.cpp:585
#13 0xc00000000782c0f0:0 in threads::thread_proc (thr_ptr=0x60000000062242a0)
at has_common_source/source/cpp/threads.cpp:225
#14 0xc00000000013fb20:0 in __pthread_bound_body ()
at /ux/core/libs/threadslibs/src/common/pthreads/pthread.c:4875
(gdb) list
537 }
538 dst = dst_instance->servers + (lcb_size_t)idx;
539 if (src->connected) {
540 assert(ringbuffer_read(&src->output_cookies, &ct, sizeof(ct)) == sizeof(ct));
541 } else {
542 assert(ringbuffer_read(&src->pending_cookies, &ct, sizeof(ct)) == sizeof(ct));
543 }
544
545 assert(ringbuffer_ensure_capacity(&dst->cmd_log, npacket));
546 assert(ringbuffer_write(&dst->cmd_log, cmd.bytes, sizeof(cmd.bytes)) == sizeof(cmd.bytes));
(gdb) p src->pending_cookies
$5 = {root = 0x6000000001e3d200 "", read_head = 0x6000000001e3d200 "",
write_head = 0x6000000001e3d200 "", size = 512, nbytes = 0}
(gdb) p src->connected
$7 = 0
lcb_size_t ringbuffer_read(ringbuffer_t *buffer, void *dest, lcb_size_t nb)
{
char *d = dest;
lcb_size_t nr = 0;
lcb_size_t space;
lcb_size_t toRead;
if (buffer->nbytes == 0) {
return 0;
}
1) Attach second node to a Couchbase claster, start rebalance process and in that time call lcb_get function many times. As a result application go down (assert in 0).
assert(ringbuffer_read(&src->pending_cookies, &ct, sizeof(ct)) == sizeof(ct));
src->pending_cookies->nbytes == 0 in that case, so ringbuffer_read return 0 so assert is 0
and application is over.
We think, that nbytes == 0 is a common situation, than src->connected == 0 and this situation must not interpretated as error.
Below you can find additional information
Stack:
#0 0xc000000000211ab0:0 in _lwp_kill+0x30 () from /lib/hpux64/libpthread.so.1
#1 0xc000000000178810:0 in pthread_kill ()
at /ux/core/libs/threadslibs/src/common/pthreads/signal.c:207
#2 0xc0000000003f8140:0 in raise ()
at ../../../../../core/libs/libc/shared_em_64_perf/../core/gen/raise.c:33
#3 0xc000000000508c50:0 in abort ()
at ../../../../../core/libs/libc/shared_em_64_perf/../core/gen/abort_em.c:84
#4 0xc0000000003c3f90:0 in _assert ()
at ../../../../../core/libs/libc/shared_em_64_perf/../core/gen/assert.c:57
#5 0xc0000000076a9080:0 in relocate_packets (src=0x6000000001c63c00,
dst_instance=0x60000000064f6800) at src/instance.c:542
#6 0xc0000000076a9b40:0 in lcb_update_serverlist (instance=0x60000000064f6800)
at src/instance.c:606
#7 0xc0000000076ac7e0:0 in vbucket_stream_handler (sock=31, which=2,
arg=0x60000000064f6800) at src/instance.c:996
#8 0xc000000007d6db60:0 in event_base_loop (base=0x6000000006381780,
flags=<not available>) at event.c:1346
#9 0xc000000000a3aed0:0 in lcb_io_run_event_loop (iops=0x60000000061bff20)
at plugins/io/libevent/plugin-libevent.c:324
#10 0xc0000000076c2750:0 in lcb_wait (instance=0x60000000064f6800)
at src/wait.c:60
#11 0xc0000000109c5e00:0 in Cache_Storage::CCouchbaseStorage::do_usefull_work (
---Type <return> to continue, or q <return> to quit---
this=0x6000000006224280) at cache_source/ucouchbasestorage.cpp:524
#12 0xc0000000109c7850:0 in Cache_Storage::CCouchbaseStorage::execute (
this=0x6000000006224280) at cache_source/ucouchbasestorage.cpp:585
#13 0xc00000000782c0f0:0 in threads::thread_proc (thr_ptr=0x60000000062242a0)
at has_common_source/source/cpp/threads.cpp:225
#14 0xc00000000013fb20:0 in __pthread_bound_body ()
at /ux/core/libs/threadslibs/src/common/pthreads/pthread.c:4875
(gdb) list
537 }
538 dst = dst_instance->servers + (lcb_size_t)idx;
539 if (src->connected) {
540 assert(ringbuffer_read(&src->output_cookies, &ct, sizeof(ct)) == sizeof(ct));
541 } else {
542 assert(ringbuffer_read(&src->pending_cookies, &ct, sizeof(ct)) == sizeof(ct));
543 }
544
545 assert(ringbuffer_ensure_capacity(&dst->cmd_log, npacket));
546 assert(ringbuffer_write(&dst->cmd_log, cmd.bytes, sizeof(cmd.bytes)) == sizeof(cmd.bytes));
(gdb) p src->pending_cookies
$5 = {root = 0x6000000001e3d200 "", read_head = 0x6000000001e3d200 "",
write_head = 0x6000000001e3d200 "", size = 512, nbytes = 0}
(gdb) p src->connected
$7 = 0
lcb_size_t ringbuffer_read(ringbuffer_t *buffer, void *dest, lcb_size_t nb)
{
char *d = dest;
lcb_size_t nr = 0;
lcb_size_t space;
lcb_size_t toRead;
if (buffer->nbytes == 0) {
return 0;
}
Firstly, When a (!connected) server is reconnected, the tasks in its "pending" buffer will be moved into "output" buffer. If it's connection is broken again immediately, relocate_packets() will go to wrong path.
Secondly, there seems to be another bug in handler:751 which could cause crash during failover. [Null pointer Check]
============================================================================================================
diff -rc libcouchbase-2.0.4/src/handler.c libcouchbase-2.0.4.patched/src/handler.c
*** libcouchbase-2.0.4/src/handler.c 2013-03-04 15:07:51.785943104 +0800
--- libcouchbase-2.0.4.patched/src/handler.c 2013-03-27 20:48:13.042296326 +0800
***************
*** 748,754 ****
{
lcb_uint16_t ret = ntohs(res->response.status);
if (ret == PROTOCOL_BINARY_RESPONSE_SUCCESS) {
! sasl_dispose(&server->sasl_conn);
server->sasl_conn = NULL;
lcb_server_connected(server);
} else if (ret == PROTOCOL_BINARY_RESPONSE_AUTH_CONTINUE) {
--- 748,756 ----
{
lcb_uint16_t ret = ntohs(res->response.status);
if (ret == PROTOCOL_BINARY_RESPONSE_SUCCESS) {
! if (server->sasl_conn) {
! sasl_dispose(&server->sasl_conn);
! }
server->sasl_conn = NULL;
lcb_server_connected(server);
} else if (ret == PROTOCOL_BINARY_RESPONSE_AUTH_CONTINUE) {
diff -rc libcouchbase-2.0.4/src/instance.c libcouchbase-2.0.4.patched/src/instance.c
*** libcouchbase-2.0.4/src/instance.c 2013-03-06 05:41:29.600470755 +0800
--- libcouchbase-2.0.4.patched/src/instance.c 2013-03-27 20:49:06.766296416 +0800
***************
*** 628,638 ****
idx = vbucket_found_incorrect_master(dst_instance->vbucket_config, vb, idx);
}
dst = dst_instance->servers + (lcb_size_t)idx;
! if (src->connected) {
! assert(ringbuffer_read(&src->output_cookies, &ct, sizeof(ct)) == sizeof(ct));
! } else {
! assert(ringbuffer_read(&src->pending_cookies, &ct, sizeof(ct)) == sizeof(ct));
! }
assert(ringbuffer_ensure_capacity(&dst->cmd_log, npacket));
assert(ringbuffer_write(&dst->cmd_log, cmd.bytes, sizeof(cmd.bytes)) == sizeof(cmd.bytes));
--- 628,635 ----
idx = vbucket_found_incorrect_master(dst_instance->vbucket_config, vb, idx);
}
dst = dst_instance->servers + (lcb_size_t)idx;
! assert(ringbuffer_read(&src->output_cookies, &ct, sizeof(ct)) == sizeof(ct) ||
! ringbuffer_read(&src->pending_cookies, &ct, sizeof(ct)) == sizeof(ct));
assert(ringbuffer_ensure_capacity(&dst->cmd_log, npacket));
assert(ringbuffer_write(&dst->cmd_log, cmd.bytes, sizeof(cmd.bytes)) == sizeof(cmd.bytes));
============================================================================================================