<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:10:06 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-755] lustre had panic on network error (stale ZQ entry)</title>
                <link>https://jira.whamcloud.com/browse/LU-755</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;While iozone testing system had a panic&lt;br/&gt;
@tcp has timed out for slow reply: &lt;span class=&quot;error&quot;&gt;&amp;#91;sent 1317906741&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;real_sent 1317906741&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;current 1317906748&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;deadline 7s&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;delay 0s&amp;#93;&lt;/span&gt;  req@ffff88010df32400 x1381925181718579/t5108(5108) o-1-&amp;gt;testfs-OST0000_UUID@192.168.123.12@tcp:6/4 lens 456/416 e 0 to 1 dl 1317906748 ref 3 fl Bulk:RX/ffffffff/ffffffff rc 0/-1&lt;br/&gt;
[ 4790.534571] Lustre: testfs-OST0000-osc-ffff88010e1a9400: Connection to service testfs-OST0000 via nid 192.168.123.12@tcp was lost; in progress operations using this service will wait for recovery to complete.&lt;br/&gt;
[ 4790.567408] Lustre: testfs-OST0000-osc-ffff88010e1a9400: Connection restored to service testfs-OST0000 using nid 192.168.123.12@tcp.&lt;br/&gt;
[ 4844.512099] LustreError: 31786:0:(socklnd_cb.c:2518:ksocknal_check_peer_timeouts()) Total 1 stale ZC_REQs for peer 192.168.123.12@tcp detected; the oldest(ffff8800daf1d600) timed out 10 secs ago, resid: 0, wmem: 0&lt;br/&gt;
[ 4844.521468] LustreError: 31786:0:(events.c:194:client_bulk_callback()) event type 0, status -5, desc ffff8800ada53200&lt;br/&gt;
[ 4844.526766] LustreError: 31789:0:(client.c:1695:ptlrpc_check_set()) @@@ bulk transfer failed  req@ffff88010df32400 x1381925181718581/t5108(5108) o-1-&amp;gt;testfs-OST0000_UUID@192.168.123.12@tcp:6/4 lens 456/416 e 0 to 0 dl 1317906748 ref 2 fl Bulk:RS/ffffffff/ffffffff rc -11/-1&lt;br/&gt;
[ 4844.536035] LustreError: 31789:0:(client.c:1696:ptlrpc_check_set()) LBUG&lt;/p&gt;

&lt;p&gt;that panic caused error in client bulk callback - which a wakeup request and unregister a bulk transfer, but not a mark request as failed.&lt;/p&gt;

&lt;p&gt;crash&amp;gt; struct ptlrpc_request ffff88010df32400&lt;br/&gt;
struct ptlrpc_request {&lt;br/&gt;
...&lt;br/&gt;
  rq_intr = 0, &lt;br/&gt;
  rq_replied = 1, &lt;br/&gt;
  rq_err = 0, &lt;br/&gt;
  rq_timedout = 0, &lt;br/&gt;
  rq_resend = 1, &lt;br/&gt;
  rq_restart = 0, &lt;br/&gt;
  rq_replay = 0, &lt;br/&gt;
  rq_no_resend = 0, &lt;br/&gt;
  rq_waiting = 0, &lt;br/&gt;
  rq_receiving_reply = 0, &lt;br/&gt;
  rq_no_delay = 0, &lt;br/&gt;
  rq_net_err = 0, &lt;br/&gt;
  rq_wait_ctx = 0, &lt;br/&gt;
  rq_early = 0, &lt;br/&gt;
  rq_must_unlink = 0, &lt;br/&gt;
  rq_fake = 0, &lt;br/&gt;
  rq_memalloc = 0, &lt;br/&gt;
  rq_packed_final = 0, &lt;br/&gt;
  rq_hp = 0, &lt;br/&gt;
  rq_at_linked = 0, &lt;br/&gt;
  rq_reply_truncate = 0, &lt;br/&gt;
  rq_committed = 0, &lt;br/&gt;
  rq_invalid_rqset = 0, &lt;br/&gt;
  rq_phase = 3955285506, &lt;br/&gt;
  rq_next_phase = 3955285506, &lt;br/&gt;
  rq_refcount = &lt;/p&gt;
{
    counter = 2
  }
&lt;p&gt;,&lt;br/&gt;
...&lt;br/&gt;
crash&amp;gt; p *((struct ptlrpc_bulk_desc *)0xffff8800ada53200)&lt;br/&gt;
$9 = {&lt;br/&gt;
  bd_success = 0, &lt;br/&gt;
  bd_network_rw = 0, &lt;br/&gt;
  bd_type = 0, &lt;br/&gt;
  bd_registered = 1, &lt;br/&gt;
  bd_lock = {&lt;br/&gt;
    raw_lock = &lt;/p&gt;
{
      slock = 0
    }
&lt;p&gt;  }, &lt;br/&gt;
  bd_import_generation = 0, &lt;br/&gt;
  bd_export = 0x0, &lt;br/&gt;
  bd_import = 0xffff88010ed33000, &lt;br/&gt;
  bd_portal = 8, &lt;br/&gt;
  bd_req = 0xffff88010df32400, &lt;/p&gt;

&lt;p&gt;so it&apos;s panic in same bulk desc as failed in client_bulk_callback&lt;/p&gt;</description>
                <environment>RHEL6/Lustre 2.1.0</environment>
        <key id="12081">LU-755</key>
            <summary>lustre had panic on network error (stale ZQ entry)</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="shadow">Alexey Lyashkov</reporter>
                        <labels>
                    </labels>
                <created>Wed, 12 Oct 2011 08:13:36 +0000</created>
                <updated>Mon, 29 May 2017 02:43:20 +0000</updated>
                            <resolved>Mon, 29 May 2017 02:43:20 +0000</resolved>
                                    <version>Lustre 2.1.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>1</watches>
                                                                            <comments>
                            <comment id="43202" author="cfaber" created="Tue, 14 Aug 2012 12:30:56 +0000"  >&lt;p&gt;This issue continues to be a problem in our mostly up to date 2.1.2 release.&lt;/p&gt;

&lt;p&gt;-cf&lt;/p&gt;</comment>
                            <comment id="197337" author="adilger" created="Mon, 29 May 2017 02:43:20 +0000"  >&lt;p&gt;Close old ticket.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzw1ev:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>10310</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>