<!-- 
RSS generated by JIRA (5.2.4#845-sha1:c9f4cc41abe72fb236945343a1f485c2c844dac9) at Tue Jun 18 01:06:41 CDT 2013

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary add field=key&field=summary to the URL of your request.
For example:
http://www.couchbase.com/issues/si/jira.issueviews:issue-xml/MB-6592/MB-6592.xml?field=key&field=summary
-->
<rss version="0.92" >
<channel>
    <title>Couchbase</title>
    <link>http://www.couchbase.com/issues</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>5.2.4</version>
        <build-number>845</build-number>
        <build-date>26-12-2012</build-date>
    </build-info>

<item>
            <title>[MB-6592] [longevity] memcached hangs when aborting during swap rebalance operation and fails to restart ( exit 71 )</title>
                <link>http://www.couchbase.com/issues/browse/MB-6592</link>
                <project id="10010" key="MB">Couchbase Server</project>
                        <description>Cluster information:&lt;br/&gt;
- 11 centos 6.2 64bit server with 4 cores CPU&lt;br/&gt;
- Each server has 10 GB RAM and 150 GB disk.&lt;br/&gt;
- 8 GB RAM for couchbase server at each node (80% total system memmories)&lt;br/&gt;
- Disk format ext3 on both data and root&lt;br/&gt;
- Each server has its own drive, no disk sharing with other server.&lt;br/&gt;
- Load 9 million items to both buckets&lt;br/&gt;
- Initial indexing, so cpu a little heavy load&lt;br/&gt;
- Cluster has 2 buckets, default (3GB) and saslbucket (3GB)&lt;br/&gt;
- Each bucket has one doc and 2 views for each doc (default d1 and saslbucket d11)&lt;br/&gt;
&lt;br/&gt;
* Create cluster with 10 nodes installed couchbase server 2.0.0-1697&lt;br/&gt;
&lt;br/&gt;
10.3.121.13&lt;br/&gt;
10.3.121.14&lt;br/&gt;
10.3.121.15&lt;br/&gt;
10.3.121.16&lt;br/&gt;
10.3.121.17&lt;br/&gt;
10.3.121.20&lt;br/&gt;
10.3.121.22&lt;br/&gt;
10.3.121.24&lt;br/&gt;
10.3.121.25&lt;br/&gt;
10.3.121.23&lt;br/&gt;
* Data path /data&lt;br/&gt;
* View path /data &lt;br/&gt;
&lt;br/&gt;
* Do swap rebalance.  Add node 26 and remove node 25&lt;br/&gt;
* Rebalance failed and saw a lot of error message memcached exited with status 71 in log page.&lt;br/&gt;
&lt;br/&gt;
Link to diags of all nodes  &lt;a href=&quot;https://s3.amazonaws.com/packages.couchbase/diag-logs/orange/201209/11nodes-1697-memcached-exit-71-20120910.tgz&quot;&gt;https://s3.amazonaws.com/packages.couchbase/diag-logs/orange/201209/11nodes-1697-memcached-exit-71-20120910.tgz&lt;/a&gt;&lt;br/&gt;
&lt;br/&gt;
Link to atop node 13  &lt;a href=&quot;https://s3.amazonaws.com/packages.couchbase/atop-files/orange/201209/atop-node13&quot;&gt;https://s3.amazonaws.com/packages.couchbase/atop-files/orange/201209/atop-node13&lt;/a&gt;&lt;br/&gt;
Due to large size of atop file, all other atop files are in /tmp directory of each node&lt;br/&gt;
</description>
                <environment>centos 6.2 64 bit build 2.0.0-1697</environment>
            <key id="19667">MB-6592</key>
            <summary>[longevity] memcached hangs when aborting during swap rebalance operation and fails to restart ( exit 71 )</summary>
                <type id="1" iconUrl="http://www.couchbase.com/issues/images/icons/issuetypes/bug.png">Bug</type>
                                <priority id="1" iconUrl="http://www.couchbase.com/issues/images/icons/priorities/blocker.png">Blocker</priority>
                    <status id="6" iconUrl="http://www.couchbase.com/issues/images/icons/statuses/closed.png">Closed</status>
                    <resolution id="5">Cannot Reproduce</resolution>
                    <security id="10011">Public</security>
                        <assignee username="thuan">Thuan Nguyen</assignee>
                                <reporter username="thuan">Thuan Nguyen</reporter>
                        <labels>
                        <label>2.0-beta-release-notes</label>
                        <label>system-test</label>
                    </labels>
                <created>Mon, 10 Sep 2012 16:17:46 -0500</created>
                <updated>Thu, 10 Jan 2013 01:21:38 -0600</updated>
                    <resolved>Wed, 26 Sep 2012 20:34:12 -0500</resolved>
                            <version>2.0-beta</version>
                                <fixVersion>2.0-beta-2</fixVersion>
                                <component>couchbase-bucket</component>
                                <votes>0</votes>
                        <watches>0</watches>
                                                    <comments>
                    <comment id="38360" author="karan" created="Mon, 10 Sep 2012 16:36:12 -0500"  >From the memcached logs&lt;br/&gt;
&lt;br/&gt;
&lt;br/&gt;
&lt;br/&gt;
Sat Sep  8 17:56:06.159645 3: Connection closed by mccouch&lt;br/&gt;
Sat Sep  8 17:56:06.159864 3: Trying to connect to mccouch: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.160432 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.160462 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.165925 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.165961 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.171397 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.171430 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.176813 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.176839 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.182241 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.182267 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.187653 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.187679 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.193033 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.193058 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.198462 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.198518 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.204013 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.204038 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.209451 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.209477 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.214788 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.214813 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.220685 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.220712 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.226387 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.226415 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.231879 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.231909 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.237313 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.237339 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.242731 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.242757 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.248138 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.248164 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.253603 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.253630 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
Sat Sep  8 17:56:06.261017 3: Failed to connect to: &amp;quot;localhost:11213&amp;quot;&lt;br/&gt;
</comment>
                    <comment id="38404" author="karan" created="Mon, 10 Sep 2012 20:09:06 -0500"  >GDB output of the memcached process:-&lt;br/&gt;
&lt;a href=&quot;https://friendpaste.com/5gFQw9wPBFOgNjue64HfM2&quot;&gt;https://friendpaste.com/5gFQw9wPBFOgNjue64HfM2&lt;/a&gt; &lt;br/&gt;
&lt;br/&gt;
&lt;br/&gt;
Not sure if related to &lt;br/&gt;
&lt;a href=&quot;http://www.couchbase.com/issues/browse/MB-5653&quot;&gt;http://www.couchbase.com/issues/browse/MB-5653&lt;/a&gt;&lt;br/&gt;
</comment>
                    <comment id="38409" author="karan" created="Mon, 10 Sep 2012 20:15:43 -0500"  >We have the core file of the memcached process available if needed. </comment>
                    <comment id="38411" author="karan" created="Mon, 10 Sep 2012 20:16:33 -0500"  >10.3.121.13 (root/couchbase)&lt;br/&gt;
&lt;br/&gt;
/root/karan</comment>
                    <comment id="38413" author="farshid" created="Mon, 10 Sep 2012 20:46:16 -0500"  >Karan,&lt;br/&gt;
&lt;br/&gt;
shouldn&amp;#39;t this be assigned to couchbase bucket team first ?</comment>
                    <comment id="38432" author="karan" created="Mon, 10 Sep 2012 23:13:08 -0500"  >Assigning to Chiyoung to see if this is ep-engine related</comment>
                    <comment id="38433" author="chiyoung" created="Mon, 10 Sep 2012 23:26:04 -0500"  >Mike, please take a look at this issue.</comment>
                    <comment id="38441" author="trond" created="Tue, 11 Sep 2012 01:22:26 -0500"  >Just guessing here, but from the callstack it looks like the crash is in the assign() method for the basic_string in flushOneDelOrSet.. (the other stacks seems somewhat sane)..</comment>
                    <comment id="38480" author="alkondratenko" created="Tue, 11 Sep 2012 11:22:22 -0500"  >Given we&amp;#39;re spinning, looks similar to a case I hit some time ago when some list in flusher was &amp;#39;infinite&amp;#39;. I even showed this to Chiyoung but we were unable to make any sense of what we saw</comment>
                    <comment id="38521" author="mikew" created="Tue, 11 Sep 2012 15:56:06 -0500"  >Trond,&lt;br/&gt;
&lt;br/&gt;
I think this might be in the logging thread. I found this in the backtrace with gdb.&lt;br/&gt;
&lt;br/&gt;
Core was generated by `/opt/couchbase/bin/memcached -X /opt/couchbase/lib/memcached/stdin_term_handler.&amp;#39;.&lt;br/&gt;
#0  0x0000003d7160b7bb in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0&lt;br/&gt;
&lt;br/&gt;
There are a few ep-engine threads at this location also, but since the logging stuff is relatively new I think it might be the cause. Please take a look at this and assign it back to me if you think the problem is in ep-engine.</comment>
                    <comment id="38565" author="trond" created="Wed, 12 Sep 2012 03:28:33 -0500"  >I find it relatively hard to believe that it may dump core on that line given that the code for that looks like:&lt;br/&gt;
&lt;br/&gt;
At file scope we have:&lt;br/&gt;
&lt;br/&gt;
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;&lt;br/&gt;
static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;&lt;br/&gt;
&lt;br/&gt;
The call we&amp;#39;re currently stuck in looks like:&lt;br/&gt;
&lt;br/&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;pthread_mutex_lock(&amp;amp;mutex);&lt;br/&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;while (run) {&lt;br/&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;struct timeval tp;&lt;br/&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;gettimeofday(&amp;amp;tp, NULL);&lt;br/&gt;
&lt;br/&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;[ ... cut ...]&lt;br/&gt;
&lt;br/&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;gettimeofday(&amp;amp;tp, NULL);&lt;br/&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;next = tp.tv_sec + (unsigned int)sleeptime;&lt;br/&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;struct timespec ts = { .tv_sec = next };&lt;br/&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;pthread_cond_timedwait(&amp;amp;cond, &amp;amp;mutex, &amp;amp;ts);    &amp;lt;- This is where we&amp;#39;re stuck&lt;br/&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;}&lt;br/&gt;
&lt;br/&gt;
I can&amp;#39;t see how we can pass stuff to pthread_cond_timedwait here that may cause it to _crash_ (it _could_ return with EINVAL for invalid input arguments)...&lt;br/&gt;
&lt;br/&gt;
If only I figured out how to ask gdb to show me the thread that caused the crash (and why.. which signal etc)&lt;br/&gt;
&lt;br/&gt;
&lt;br/&gt;
</comment>
                    <comment id="38623" author="chiyoung" created="Wed, 12 Sep 2012 13:26:57 -0500"  >Let me take a look at this issue to see if there are anything suspicious in ep-engine.</comment>
                    <comment id="39838" author="chiyoung" created="Wed, 26 Sep 2012 19:23:02 -0500"  >Tony,&lt;br/&gt;
&lt;br/&gt;
I was not able to reproduce this issue with 4 node cluster and still don&amp;#39;t know why it happened.&lt;br/&gt;
&lt;br/&gt;
Did you see the same issue recently in your manual and longevity test?</comment>
                    <comment id="39849" author="thuan" created="Wed, 26 Sep 2012 20:19:01 -0500"  >I have not seen this issue since then in my system test.</comment>
                    <comment id="39850" author="chiyoung" created="Wed, 26 Sep 2012 20:34:12 -0500"  >Let&amp;#39;s close this bug at this time, and create a new bug if we see this issue again. There have been lots of fixes including bucket destroy in ep-engine.</comment>
                </comments>
                    <attachments>
                    <attachment id="14919" name="memcached_logfile" size="1095025" author="karan" created="Mon, 10 Sep 2012 16:35:31 -0500" />
                </attachments>
            <subtasks>
        </subtasks>
                <customfields>
                                                                        <customfield id="customfield_10180" key="com.atlassian.jira.ext.charting:firstresponsedate">
                <customfieldname>Date of First Response</customfieldname>
                <customfieldvalues>
                    <customfieldvalue>Mon, 10 Sep 2012 16:36:12 -0500</customfieldvalue>

                </customfieldvalues>
            </customfield>
                                                                                                                                                                                                            <customfield id="customfield_10081" key="com.pyxis.greenhopper.jira:gh-global-rank">
                <customfieldname>Rank</customfieldname>
                <customfieldvalues>
                    <customfieldvalue>4045</customfieldvalue>
                </customfieldvalues>
            </customfield>
                                                                                                                                                                                        <customfield id="customfield_10181" key="com.atlassian.jira.ext.charting:timeinstatus">
                <customfieldname>Time In Status</customfieldname>
                <customfieldvalues>
                    
                </customfieldvalues>
            </customfield>
                                                                    </customfields>
    </item>
</channel>
</rss>