<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:10:27 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-793] Reconnections should not be refused when there is a request in progress from this client.</title>
                <link>https://jira.whamcloud.com/browse/LU-793</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;While originally this was a useful workaround, it created a lot of other unintended problems.&lt;/p&gt;

&lt;p&gt;This code must be disabled and instead we just should disable handling several duplicate requests at the same time.&lt;/p&gt;</description>
                <environment></environment>
        <key id="12244">LU-793</key>
            <summary>Reconnections should not be refused when there is a request in progress from this client.</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="tappro">Mikhail Pershin</assignee>
                                    <reporter username="green">Oleg Drokin</reporter>
                        <labels>
                            <label>JL</label>
                    </labels>
                <created>Tue, 25 Oct 2011 11:41:16 +0000</created>
                <updated>Mon, 3 Jun 2019 17:52:49 +0000</updated>
                            <resolved>Tue, 18 Feb 2014 22:03:19 +0000</resolved>
                                    <version>Lustre 2.1.0</version>
                    <version>Lustre 2.2.0</version>
                    <version>Lustre 2.4.0</version>
                    <version>Lustre 1.8.6</version>
                                    <fixVersion>Lustre 2.6.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>24</watches>
                                                                            <comments>
                            <comment id="32308" author="spitzcor" created="Wed, 28 Mar 2012 12:35:54 +0000"  >&lt;p&gt;Vitaly shared that a patch is under review at &lt;a href=&quot;http://review.whamcloud.com/#change,1616&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,1616&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="33436" author="igolovach" created="Wed, 4 Apr 2012 06:08:42 +0000"  >&lt;p&gt;Hi Oleg,&lt;/p&gt;

&lt;p&gt;since we are interested in this patch could you provide us with the next information:&lt;br/&gt;
 is  &lt;a href=&quot;http://review.whamcloud.com/#change,1616&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,1616&lt;/a&gt; a final version or you have a plan to add something there (if yes - what are your time estimates for such action)?&lt;/p&gt;

&lt;p&gt;thank you.&lt;/p&gt;</comment>
                            <comment id="33461" author="green" created="Wed, 4 Apr 2012 12:23:52 +0000"  >&lt;p&gt;Well, I cannot predict how well the inspections will pass.&lt;br/&gt;
I think it&apos;s fine, though a bit of real testing is needed to ensure clients will not end up bombarding the servers with repeated resent requests too often (for the requests that were stuck on the server).&lt;/p&gt;

&lt;p&gt;Also see my comment for patchset 3 on Mar 13th.&lt;/p&gt;</comment>
                            <comment id="34380" author="igolovach" created="Tue, 10 Apr 2012 08:43:12 +0000"  >&lt;p&gt;Hi Oleg.&lt;/p&gt;

&lt;p&gt;Is it possible to speed-up the review process somehow? Or we can do review in gerrit from our side since we are interested in this code?&lt;/p&gt;

&lt;p&gt;Thank you,&lt;br/&gt;
    Iurii Golovach&lt;/p&gt;</comment>
                            <comment id="35966" author="nrutman" created="Wed, 2 May 2012 12:37:23 +0000"  >&lt;p&gt;In the Description for this bug, does &quot;this was a useful workaround&quot; refer to the &lt;a href=&quot;http://review.whamcloud.com/#change,1616&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,1616&lt;/a&gt; patch?&lt;br/&gt;
Is this Oleg&apos;s patch originally?  If someone were to work on it, should they address Andreas&apos; comments in the patch or try to follow the 2nd line of the Description?&lt;/p&gt;</comment>
                            <comment id="35969" author="green" created="Wed, 2 May 2012 12:56:42 +0000"  >&lt;p&gt;The workaround being referred to is the refusal of reconnection.&lt;/p&gt;

&lt;p&gt;The patch in change 1616 is originally mine, and I still work on it as I have time.&lt;br/&gt;
But if you have someone with more time that can pick it up and finish it faster, that&apos;s cool too.&lt;/p&gt;

&lt;p&gt;The Andreas&apos; comments are definitely valid and the request switching I think is needed, I was liking this idea from the tart, and now Andreas seems to like it too, so it needs to be done I guess.&lt;/p&gt;</comment>
                            <comment id="43203" author="spitzcor" created="Tue, 14 Aug 2012 12:33:02 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#change,1616&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,1616&lt;/a&gt; needs rebasing.&lt;/p&gt;</comment>
                            <comment id="46850" author="morrone" created="Mon, 22 Oct 2012 18:46:43 +0000"  >&lt;p&gt;Long standing bug, duplicate of &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7&quot; title=&quot;Reconnect server-&amp;gt;client connection&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7&quot;&gt;&lt;del&gt;LU-7&lt;/del&gt;&lt;/a&gt;, which in turn was a duplicate of years-old bugzilla bug.&lt;/p&gt;</comment>
                            <comment id="47437" author="iurii" created="Tue, 6 Nov 2012 09:39:13 +0000"  >&lt;p&gt;This scheme shows cases when we drop new packet and when we process it.&lt;/p&gt;</comment>
                            <comment id="49504" author="jlevi" created="Thu, 20 Dec 2012 14:57:33 +0000"  >&lt;p&gt;Mike, &lt;br/&gt;
Could you please have a look at this one?&lt;br/&gt;
Thank you!&lt;/p&gt;</comment>
                            <comment id="50042" author="tappro" created="Sun, 6 Jan 2013 09:49:18 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#change,4960&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,4960&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;I&apos;ve just updated old patch to be compatible with new master code. It provides the same functionality as old one so far, without any attempts to use new reply buffers while answering on older request.&lt;/p&gt;</comment>
                            <comment id="50491" author="cliffw" created="Tue, 15 Jan 2013 13:41:00 +0000"  >&lt;p&gt;I am still experiencing this issue on Hyperion, especially using mdtest file-per-process. In the last test, starting seeing these timeouts prior to the actual error.&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
Jan 14 22:35:08 hyperion-rst6 kernel: LNet: Service thread pid 11870 was inactive &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; 200.00s. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; debugging purposes:
Jan 14 22:35:08 hyperion-rst6 kernel: Pid: 11870, comm: mdt03_001
Jan 14 22:35:08 hyperion-rst6 kernel:
Jan 14 22:35:08 hyperion-rst6 kernel: Call Trace:
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa06cf072&amp;gt;] start_this_handle+0x282/0x500 [jbd2]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffff81092170&amp;gt;] ? autoremove_wake_function+0x0/0x40
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa06cf4f0&amp;gt;] jbd2_journal_start+0xd0/0x110 [jbd2]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa072bab8&amp;gt;] ldiskfs_journal_start_sb+0x58/0x90 [ldiskfs]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa072be7c&amp;gt;] ldiskfs_dquot_initialize+0x4c/0xc0 [ldiskfs]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa0708590&amp;gt;] ? ldiskfs_delete_inode+0x0/0x250 [ldiskfs]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffff811968a3&amp;gt;] generic_delete_inode+0x173/0x1d0
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffff81196965&amp;gt;] generic_drop_inode+0x65/0x80
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffff811957b2&amp;gt;] iput+0x62/0x70
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa0e3fc24&amp;gt;] osd_object_delete+0x1d4/0x2f0 [osd_ldiskfs]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa07e6059&amp;gt;] lu_object_free+0x89/0x1a0 [obdclass]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa03a5351&amp;gt;] ? libcfs_debug_msg+0x41/0x50 [libcfs]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa03aa5d2&amp;gt;] ? cfs_hash_bd_from_key+0x42/0xd0 [libcfs]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa07e690d&amp;gt;] lu_object_put+0xad/0x320 [obdclass] 
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f2c69d&amp;gt;] mdt_object_unlock_put+0x3d/0x110 [mdt]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f4e0d7&amp;gt;] mdt_reint_unlink+0x637/0x850 [mdt]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa0965fce&amp;gt;] ? lustre_pack_reply_flags+0xae/0x1f0 [ptlrpc]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f48cf1&amp;gt;] mdt_reint_rec+0x41/0xe0 [mdt]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f423a3&amp;gt;] mdt_reint_internal+0x4e3/0x7d0 [mdt]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f426d4&amp;gt;] mdt_reint+0x44/0xe0 [mdt]
Jan 14 22:35:08 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f31a72&amp;gt;] mdt_handle_common+0x8e2/0x1680 [mdt]
Jan 14 22:35:09 hyperion-rst6 kernel: [&amp;lt;ffffffffa0f67a25&amp;gt;] mds_regular_handle+0x15/0x20 [mdt]
Jan 14 22:35:09 hyperion-rst6 kernel: [&amp;lt;ffffffffa09761ec&amp;gt;] ptlrpc_server_handle_request+0x41c/0xdf0 [ptlrpc]
Jan 14 22:35:09 hyperion-rst6 kernel: [&amp;lt;ffffffffa039564e&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
Jan 14 22:35:09 hyperion-rst6 kernel: [&amp;lt;ffffffffa096d669&amp;gt;] ? ptlrpc_wait_event+0xa9/0x290 [ptlrpc]
Jan 14 22:35:09 hyperion-rst6 kernel: [&amp;lt;ffffffff81053463&amp;gt;] ? __wake_up+0x53/0x70
Jan 14 22:35:09 hyperion-rst6 kernel: [&amp;lt;ffffffffa0977776&amp;gt;] ptlrpc_main+0xbb6/0x1950 [ptlrpc]
Jan 14 22:35:09 hyperion-rst6 kernel: [&amp;lt;ffffffffa0976bc0&amp;gt;] ? ptlrpc_main+0x0/0x1950 [ptlrpc]
Jan 14 22:35:09 hyperion-rst6 kernel: [&amp;lt;ffffffff8100c14a&amp;gt;] child_rip+0xa/0x20
Jan 14 22:35:09 hyperion-rst6 kernel: [&amp;lt;ffffffffa0976bc0&amp;gt;] ? ptlrpc_main+0x0/0x1950 [ptlrpc]
Jan 14 22:35:09 hyperion-rst6 kernel: [&amp;lt;ffffffffa0976bc0&amp;gt;] ? ptlrpc_main+0x0/0x1950 [ptlrpc]
Jan 14 22:35:09 hyperion-rst6 kernel: [&amp;lt;ffffffff8100c140&amp;gt;] ? child_rip+0x0/0x20
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="50493" author="cliffw" created="Tue, 15 Jan 2013 13:52:34 +0000"  >&lt;p&gt;Here is a call trace from a client, while the MDS is reporting the RPC issue:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
2013-01-15 09:45:57 INFO: task mdtest:6078 blocked &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; more than 120 seconds.
2013-01-15 09:45:57 &lt;span class=&quot;code-quote&quot;&gt;&quot;echo 0 &amp;gt; /proc/sys/kernel/hung_task_timeout_secs&quot;&lt;/span&gt; disables &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; message.
2013-01-15 09:45:57 mdtest        D 0000000000000003     0  6078   6074 0x00000000
2013-01-15 09:45:57  ffff88017ede7c58 0000000000000086 0000000000000000 0000000000000086
2013-01-15 09:45:57  ffff88030aa62a20 ffff8801c5851960 ffffffff81aa5740 0000000000000286
2013-01-15 09:45:57  ffff8801bba69ab8 ffff88017ede7fd8 000000000000fb88 ffff8801bba69ab8
2013-01-15 09:45:57 Call Trace:
2013-01-15 09:45:57  [&amp;lt;ffffffff814ff6fe&amp;gt;] __mutex_lock_slowpath+0x13e/0x180
2013-01-15 09:45:57  [&amp;lt;ffffffff814ff59b&amp;gt;] mutex_lock+0x2b/0x50
2013-01-15 09:45:57  [&amp;lt;ffffffffa07eb80c&amp;gt;] mdc_reint+0x3c/0x3b0 [mdc]
2013-01-15 09:45:57  [&amp;lt;ffffffffa07ec880&amp;gt;] mdc_unlink+0x1b0/0x500 [mdc]
2013-01-15 09:45:57  [&amp;lt;ffffffffa0a5feb9&amp;gt;] lmv_unlink+0x199/0x7e0 [lmv]
2013-01-15 09:45:57  [&amp;lt;ffffffffa097dba6&amp;gt;] ll_unlink+0x176/0x670 [lustre]
2013-01-15 09:45:57  [&amp;lt;ffffffff8118923f&amp;gt;] vfs_unlink+0x9f/0xe0
2013-01-15 09:45:57  [&amp;lt;ffffffff81187f8a&amp;gt;] ? lookup_hash+0x3a/0x50
2013-01-15 09:45:57  [&amp;lt;ffffffff8118b773&amp;gt;] do_unlinkat+0x183/0x1c0
2013-01-15 09:45:57  [&amp;lt;ffffffff8119a960&amp;gt;] ? mntput_no_expire+0x30/0x110
2013-01-15 09:45:57  [&amp;lt;ffffffff8100c5b5&amp;gt;] ? math_state_restore+0x45/0x60
2013-01-15 09:45:57  [&amp;lt;ffffffff8118b7c6&amp;gt;] sys_unlink+0x16/0x20
2013-01-15 09:45:57  [&amp;lt;ffffffff8100b0f2&amp;gt;] system_call_fastpath+0x16/0x1b
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="50496" author="morrone" created="Tue, 15 Jan 2013 13:59:22 +0000"  >&lt;p&gt;Cliff, the refused-reconnection problem is the after-effect of whatever problem happened, and there are many ways it can be triggered.  You should open a separate bug report for the initial problem of the server going unresponsive, or whatever happened.&lt;/p&gt;</comment>
                            <comment id="50732" author="igolovach" created="Thu, 17 Jan 2013 17:42:54 +0000"  >&lt;p&gt;Hi gents, &lt;/p&gt;

&lt;p&gt;here is a patch version for this issue from our team:&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/#change,5054&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,5054&lt;/a&gt; &lt;/p&gt;</comment>
                            <comment id="50809" author="igolovach" created="Fri, 18 Jan 2013 11:05:33 +0000"  >&lt;p&gt;Since Andreas recommended to merge Mikhail patch and mine I updated &lt;a href=&quot;http://review.whamcloud.com/#change,4960&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,4960&lt;/a&gt; with improvement request.&lt;/p&gt;</comment>
                            <comment id="53473" author="spitzcor" created="Wed, 6 Mar 2013 15:17:42 +0000"  >&lt;p&gt;Change #4960 failed autotest.  Do you just need to resubmit?&lt;/p&gt;</comment>
                            <comment id="53663" author="tappro" created="Sun, 10 Mar 2013 23:16:08 +0000"  >&lt;p&gt;retriggered&lt;/p&gt;</comment>
                            <comment id="65523" author="tappro" created="Mon, 2 Sep 2013 05:39:57 +0000"  >&lt;p&gt;This patch doesn&apos;t work properly with bulk resends because bulks use new XID always, even for RESENT case. Therefore we cannot match original request that might be processed on server at the same time. It is not clear how to solve this in simple way in context of this patch, looks like patch should be reworked and will be more complex, e.g. we might need to change protocol and store original XID in bulk along with new one.&lt;br/&gt;
Btw, the comment about bulk XID and replated code:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; ptlrpc_register_bulk(struct ptlrpc_request *req)
{
...

	/* An XID is only used &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; a single request from the client.
	 * For retried bulk transfers, a &lt;span class=&quot;code-keyword&quot;&gt;new&lt;/span&gt; XID will be allocated in
	 * in ptlrpc_check_set() &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; it needs to be resent, so it is not
	 * using the same RDMA match bits after an error.
	 */
...
}

and

void ptlrpc_resend_req(struct ptlrpc_request *req)
{
        DEBUG_REQ(D_HA, req, &lt;span class=&quot;code-quote&quot;&gt;&quot;going to resend&quot;&lt;/span&gt;);
        lustre_msg_set_handle(req-&amp;gt;rq_reqmsg, &amp;amp;(struct lustre_handle){ 0 });
        req-&amp;gt;rq_status = -EAGAIN;

	spin_lock(&amp;amp;req-&amp;gt;rq_lock);
        req-&amp;gt;rq_resend = 1;
        req-&amp;gt;rq_net_err = 0;
        req-&amp;gt;rq_timedout = 0;
        &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (req-&amp;gt;rq_bulk) {
                __u64 old_xid = req-&amp;gt;rq_xid;

                &lt;span class=&quot;code-comment&quot;&gt;/* ensure previous bulk fails */&lt;/span&gt;
                req-&amp;gt;rq_xid = ptlrpc_next_xid();
                CDEBUG(D_HA, &lt;span class=&quot;code-quote&quot;&gt;&quot;resend bulk old x&quot;&lt;/span&gt;LPU64&lt;span class=&quot;code-quote&quot;&gt;&quot; &lt;span class=&quot;code-keyword&quot;&gt;new&lt;/span&gt; x&quot;&lt;/span&gt;LPU64&lt;span class=&quot;code-quote&quot;&gt;&quot;\n&quot;&lt;/span&gt;,
                       old_xid, req-&amp;gt;rq_xid);
        }
        ptlrpc_client_wake_req(req);
	spin_unlock(&amp;amp;req-&amp;gt;rq_lock);
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="66314" author="tappro" created="Wed, 11 Sep 2013 06:12:24 +0000"  >&lt;p&gt;patch was refreshed &lt;a href=&quot;http://review.whamcloud.com/#/c/4960/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/4960/&lt;/a&gt;. I doesn&apos;t handle bulk request for now, this will be solved in the following patch.&lt;/p&gt;</comment>
                            <comment id="68184" author="pjones" created="Wed, 2 Oct 2013 18:38:44 +0000"  >&lt;p&gt;This is still a support priority, but we need to finalize the fix before we consider it for inclusion in a release&lt;/p&gt;</comment>
                            <comment id="69915" author="tappro" created="Fri, 25 Oct 2013 15:25:22 +0000"  >&lt;p&gt;Patch is refreshed, now it handles all request including bulk. That requires protocol changes and works only with new clients, old clients will be handled as before - returning -EBUSY on connect request if there is another request in processing &lt;/p&gt;</comment>
                            <comment id="72401" author="tappro" created="Wed, 27 Nov 2013 16:01:51 +0000"  >&lt;p&gt;Patch was updated again and I hope it addresses all cases including bulk requests. It doesn&apos;t change protocol now. Cris, I expect this patch will be landed soon to the master, can you try it and see how it helps? &lt;/p&gt;</comment>
                            <comment id="72414" author="morrone" created="Wed, 27 Nov 2013 18:21:00 +0000"  >&lt;p&gt;Unfortunately, no, we won&apos;t be able to find out it it helps for some time.  We are doing a major upgrade to Lustre 2.4 over the next 2-3 weeks on the SCF machines, but this patch missed the window for inclusion in that distribution.  We will have to work it into the pipeline for the next upgrade.&lt;/p&gt;

&lt;p&gt;Can you explain a little more about what the patch will do?  I see &quot;Bulk requests are aborted upon reconnection by comparing connection count of request and export.&quot; in the patch comment.  What happens when the bulk requests are aborted?  Will the client transparently resend them?&lt;/p&gt;

&lt;p&gt;Also, what happens if there is more than one rpc outstanding?  Is the client able to reconnect in that case?&lt;/p&gt;</comment>
                            <comment id="72433" author="tappro" created="Wed, 27 Nov 2013 20:11:46 +0000"  >&lt;p&gt;The behavior is almost the same as before for bulks. Currently all pending bulks are aborted if new reconnect arrived from client and reconnect is refused with -EBUSY until there will be no more active requests, this is how it is handled before patch. With this patch we accept reconnect even if there are active requests and all bulks from last connection are aborted. Basically it is the same behavior as before, but now the connection count is checked instead of specific flag.&lt;br/&gt;
Client will resend aborted bulks, yes. Also the client is able to reconnect always, but resent bulk may stuck on original bulk until it is aborted.&lt;/p&gt;</comment>
                            <comment id="73297" author="adilger" created="Wed, 11 Dec 2013 17:22:50 +0000"  >&lt;p&gt;I think this patch introduced a timeout in conf-sanity (&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4349&quot; title=&quot;conf-sanity test_47: test failed to respond and timed out&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4349&quot;&gt;&lt;del&gt;LU-4349&lt;/del&gt;&lt;/a&gt;), so that needs to be addressed before this patch is introduced into the 2.4 release. &lt;/p&gt;</comment>
                            <comment id="76539" author="pjones" created="Sat, 8 Feb 2014 05:29:56 +0000"  >&lt;p&gt;Mike&lt;/p&gt;

&lt;p&gt;Could you please clarify what LLNL would need to port in order to use this fix on b2_4?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="76571" author="tappro" created="Mon, 10 Feb 2014 08:18:59 +0000"  >&lt;p&gt;Peter, the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-793&quot; title=&quot;Reconnections should not be refused when there is a request in progress from this client.&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-793&quot;&gt;&lt;del&gt;LU-793&lt;/del&gt;&lt;/a&gt; and &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4349&quot; title=&quot;conf-sanity test_47: test failed to respond and timed out&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4349&quot;&gt;&lt;del&gt;LU-4349&lt;/del&gt;&lt;/a&gt; are needed.&lt;/p&gt;</comment>
                            <comment id="76790" author="pjones" created="Tue, 11 Feb 2014 22:50:49 +0000"  >&lt;p&gt;Backports to b2_4&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#/c/9209/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9209/&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/#/c/9210/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9210/&lt;/a&gt;&lt;br/&gt;
&lt;a href=&quot;http://review.whamcloud.com/#/c/9211/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9211/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="78257" author="manish" created="Mon, 3 Mar 2014 19:09:19 +0000"  >&lt;p&gt;Hi &lt;/p&gt;

&lt;p&gt;We are seeing similar issues on Lustre 2.1.6 release, so is this patch compatible with 2.1.x release and if yes then can this be backported to branch b2_1. &lt;/p&gt;

&lt;p&gt;Thank You,&lt;br/&gt;
          Manish&lt;/p&gt;</comment>
                            <comment id="88048" author="morrone" created="Wed, 2 Jul 2014 22:14:21 +0000"  >&lt;p&gt;The patch&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/#/c/9211/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/9211/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;was determined to not be needed for b2_4.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="10080">LU-7</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                                        </outwardlinks>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="17178">LU-2621</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="22348">LU-4349</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="22674">LU-4458</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="10080">LU-7</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="22377">LU-4359</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="16850">LU-2429</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="22729">LU-4480</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="12027" name="LU-793.jpg" size="40069" author="iurii" created="Tue, 6 Nov 2012 09:39:13 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzve3b:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>5914</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>