<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:28:47 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2854] hanging ll_ost_io and refuse connections</title>
                <link>https://jira.whamcloud.com/browse/LU-2854</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;We see the hanging ll_ost_io threads, stacked something in here. And the new connections are refused until this stacked threads finished up. we only had OSS failover to fix this issue. we got backtrace, please have a look at them if see where is problem. &lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Feb 20 09:22:46 oss01 kernel: Call Trace:
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff80062ff2&amp;gt;] thread_return+0x62/0xfe
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff80046c6e&amp;gt;] try_to_wake_up+0x472/0x484
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff800649e1&amp;gt;] __down+0x99/0xd8
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff8028b1d5&amp;gt;] __down_trylock+0x44/0x4e
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff800646c9&amp;gt;] __down_failed+0x35/0x3a
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff887fe406&amp;gt;] .text.lock.ldlm_pool+0x37/0x71 [ptlrpc]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff80064614&amp;gt;] __down_read+0x12/0x92
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff8003f279&amp;gt;] shrink_slab+0xd0/0x153
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff800ce4ce&amp;gt;] zone_reclaim+0x235/0x2cd
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff8000985a&amp;gt;] __d_lookup+0xb0/0xff
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff8000a939&amp;gt;] get_page_from_freelist+0xbf/0x442
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff88b134d2&amp;gt;] filter_fid2dentry+0x512/0x740 [obdfilter]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff8000f46f&amp;gt;] __alloc_pages+0x78/0x308
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff888a33ad&amp;gt;] kiblnd_launch_tx+0x16d/0x9d0 [ko2iblnd]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff80025e20&amp;gt;] find_or_create_page+0x32/0x72
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff88b27445&amp;gt;] filter_get_page+0x35/0x70 [obdfilter]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff88b27a81&amp;gt;] filter_preprw_read+0x601/0xd30 [obdfilter]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff886ff873&amp;gt;] lnet_send+0x9a3/0x9d0 [lnet]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff886fdaf7&amp;gt;] lnet_prep_send+0x67/0xb0 [lnet]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff88b29f0c&amp;gt;] filter_preprw+0x1d5c/0x1dc0 [obdfilter]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff88816c3a&amp;gt;] lustre_pack_reply_flags+0x86a/0x950 [ptlrpc]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff8880eaa8&amp;gt;] ptlrpc_send_reply+0x5e8/0x600 [ptlrpc]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff88acfac3&amp;gt;] ost_brw_read+0xb33/0x1a70 [ost]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff88812ed5&amp;gt;] lustre_msg_get_opc+0x35/0xf0 [ptlrpc]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff8008e7f9&amp;gt;] default_wake_function+0x0/0xe
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff88813088&amp;gt;] lustre_msg_check_version_v2+0x8/0x20 [ptlrpc]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff88ad8363&amp;gt;] ost_handle+0x2e73/0x55b0 [ost]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff80154b81&amp;gt;] __next_cpu+0x19/0x28
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff800778c5&amp;gt;] smp_send_reschedule+0x4e/0x53
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff888226b9&amp;gt;] ptlrpc_server_handle_request+0x989/0xe00 [ptlrpc]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff88822e15&amp;gt;] ptlrpc_wait_event+0x2e5/0x310 [ptlrpc]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff8008cc1e&amp;gt;] __wake_up_common+0x3e/0x68
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff88823da6&amp;gt;] ptlrpc_main+0xf66/0x1120 [ptlrpc]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff8005dfb1&amp;gt;] child_rip+0xa/0x11
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff88822e40&amp;gt;] ptlrpc_main+0x0/0x1120 [ptlrpc]
Feb 20 09:22:46 oss01 kernel:  [&amp;lt;ffffffff8005dfa7&amp;gt;] child_rip+0x0/0x11
Feb 20 09:22:46 oss01 kernel: 
Feb 20 09:22:46 oss01 kernel: LustreError: dumping log to /tmp/lustre-log.1361319766.23768
Feb 20 09:35:48 oss01 kernel: Lustre: 13106:0:(service.c:808:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/-383), not sending early reply
Feb 20 09:35:48 oss01 kernel:   req@ffff8105cc619800 x1420503574224933/t0 o3-&amp;gt;28190515-07cf-b016-422b-1b05f42dd534@NET_0x50000c0a803ae_UUID:0/0 lens 448/400 e 4 to 0 dl 1361320553 ref 2 fl Interpret:/
0/0 rc 0/0
Feb 20 09:35:48 oss01 kernel: Lustre: 13106:0:(service.c:808:ptlrpc_at_send_early_reply()) Skipped 1 previous similar message
Feb 20 09:35:49 oss01 kernel: Lustre: 8237:0:(service.c:808:ptlrpc_at_send_early_reply()) @@@ Couldn&apos;t add any time (5/-383), not sending early reply
Feb 20 09:35:49 oss01 kernel:   req@ffff81061e650400 x1420503605573149/t0 o3-&amp;gt;554c2172-ea07-8255-881e-847f196c5522@NET_0x50000c0a80399_UUID:0/0 lens 448/400 e 4 to 0 dl 1361320554 ref 2 fl Interpret:/
0/0 rc 0/0
Feb 20 09:38:06 oss01 kernel: Lustre: 10022:0:(ldlm_lib.c:574:target_handle_reconnect()) lustre-OST0000: 28190515-07cf-b016-422b-1b05f42dd534 reconnecting
Feb 20 09:38:06 oss01 kernel: Lustre: 10022:0:(ldlm_lib.c:574:target_handle_reconnect()) Skipped 65 previous similar messages
Feb 20 09:38:06 oss01 kernel: Lustre: 10022:0:(ldlm_lib.c:874:target_handle_connect()) lustre-OST0000: refuse reconnection from 28190515-07cf-b016-422b-1b05f42dd534@192.168.3.174@o2ib to 0xffff8102d40
8d600; still busy with 1 active RPCs
Feb 20 09:38:06 oss01 kernel: Lustre: 10022:0:(ldlm_lib.c:874:target_handle_connect()) Skipped 63 previous similar messages
Feb 20 09:38:06 oss01 kernel: LustreError: 10022:0:(ldlm_lib.c:1919:target_send_reply_msg()) @@@ processing error (-16)  req@ffff810181ffd000 x1420503574335450/t0 o8-&amp;gt;28190515-07cf-b016-422b-1b05f42dd
534@NET_0x50000c0a803ae_UUID:0/0 lens 368/264 e 0 to 0 dl 1361320786 ref 1 fl Interpret:/0/0 rc -16/0
Feb 20 09:38:06 oss01 kernel: LustreError: 10022:0:(ldlm_lib.c:1919:target_send_reply_msg()) Skipped 63 previous similar messages
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="17680">LU-2854</key>
            <summary>hanging ll_ost_io and refuse connections</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="hongchao.zhang">Hongchao Zhang</assignee>
                                    <reporter username="ihara">Shuichi Ihara</reporter>
                        <labels>
                            <label>ptr</label>
                    </labels>
                <created>Fri, 22 Feb 2013 21:48:11 +0000</created>
                <updated>Sat, 19 Apr 2014 16:55:53 +0000</updated>
                            <resolved>Sat, 19 Apr 2014 16:55:53 +0000</resolved>
                                    <version>Lustre 1.8.8</version>
                                    <fixVersion>Lustre 1.8.9</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="52921" author="pjones" created="Sat, 23 Feb 2013 01:30:38 +0000"  >&lt;p&gt;Hongchao&lt;/p&gt;

&lt;p&gt;Could you please comment on this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="53050" author="ihara" created="Tue, 26 Feb 2013 17:40:43 +0000"  >&lt;p&gt;Hongchao&lt;br/&gt;
do you have chance to have a look at log files?&lt;/p&gt;</comment>
                            <comment id="53118" author="hongchao.zhang" created="Wed, 27 Feb 2013 11:25:23 +0000"  >&lt;p&gt;Hi, this issue is related to ldlm_pools_shrink and there could be a deadlock on some ldlm_lock, will need some more time to investigate it,&lt;br/&gt;
and will update its status soon.&lt;/p&gt;</comment>
                            <comment id="53249" author="hongchao.zhang" created="Mon, 4 Mar 2013 04:34:56 +0000"  >&lt;p&gt;this issue is a little similar with &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1520&quot; title=&quot;client fails MDS connection and stack threads on another client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1520&quot;&gt;&lt;del&gt;LU-1520&lt;/del&gt;&lt;/a&gt;, which is caused by the contest of different calls of &quot;ldlm_pools_shrink&quot; under low memory situation.&lt;br/&gt;
could you please try the patch &lt;a href=&quot;http://review.whamcloud.com/#change,3270&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#change,3270&lt;/a&gt; in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1520&quot; title=&quot;client fails MDS connection and stack threads on another client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1520&quot;&gt;&lt;del&gt;LU-1520&lt;/del&gt;&lt;/a&gt;? thanks!&lt;/p&gt;

&lt;p&gt;the log in OSS02 seems to be different with the others, and it could be related to the lock contending of inode-&amp;gt;i_alloc_sem.&lt;/p&gt;</comment>
                            <comment id="53252" author="ihara" created="Mon, 4 Mar 2013 06:42:47 +0000"  >&lt;blockquote&gt;
&lt;p&gt;the log in OSS02 seems to be different with the others, and it could be related to the lock contending of inode-&amp;gt;i_alloc_sem.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;You mean not related to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1520&quot; title=&quot;client fails MDS connection and stack threads on another client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1520&quot;&gt;&lt;del&gt;LU-1520&lt;/del&gt;&lt;/a&gt;, but another root cause for this?&lt;/p&gt;</comment>
                            <comment id="53439" author="hongchao.zhang" created="Wed, 6 Mar 2013 10:02:56 +0000"  >&lt;p&gt;Yes, for the stack trace is different. But no cause is found until now, Is this issue reproducible?&lt;br/&gt;
What is the result of test with the patch from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1520&quot; title=&quot;client fails MDS connection and stack threads on another client&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1520&quot;&gt;&lt;del&gt;LU-1520&lt;/del&gt;&lt;/a&gt;?&lt;/p&gt;</comment>
                            <comment id="82023" author="ihara" created="Sat, 19 Apr 2014 15:02:07 +0000"  >&lt;p&gt;This patches have been landed in 1.8.9 and we upgraded system with it. So, please close this ticket.&lt;/p&gt;</comment>
                            <comment id="82026" author="pjones" created="Sat, 19 Apr 2014 16:55:53 +0000"  >&lt;p&gt;ok thanks Ihara&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="12265" name="oss01_messages" size="956828" author="ihara" created="Fri, 22 Feb 2013 21:48:11 +0000"/>
                            <attachment id="12266" name="oss02_messages" size="1023395" author="ihara" created="Fri, 22 Feb 2013 21:48:11 +0000"/>
                            <attachment id="12267" name="oss03_messages" size="831310" author="ihara" created="Fri, 22 Feb 2013 21:48:11 +0000"/>
                            <attachment id="12268" name="oss04_messages" size="953016" author="ihara" created="Fri, 22 Feb 2013 21:48:11 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvjpr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>6915</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>