<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:43:12 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-11362] sanity test_156: timeout loop in ptlrpc_check_set()</title>
                <link>https://jira.whamcloud.com/browse/LU-11362</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for John Hammond &amp;lt;jhammond@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.whamcloud.com/test_sets/8ee96d5e-b56c-11e8-a7de-52540065bddc&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.whamcloud.com/test_sets/8ee96d5e-b56c-11e8-a7de-52540065bddc&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;test_156 failed with the following error:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Timeout occurred after 166 mins, last suite running was sanity, restarting cluster to continue tests
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;dd (pid 6412) is looping in &lt;tt&gt;ptlrpc_check_set()&lt;/tt&gt;:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[10125.205230] dd              S ffff968d3a832f70     0  6412  31766 0x00000080
[10125.206015] Call Trace:
[10125.206273]  [&amp;lt;ffffffffac914029&amp;gt;] schedule+0x29/0x70
[10125.206797]  [&amp;lt;ffffffffac9118d4&amp;gt;] schedule_timeout+0x174/0x2c0
[10125.207550]  [&amp;lt;ffffffffac2a3750&amp;gt;] ? internal_add_timer+0x70/0x70
[10125.208213]  [&amp;lt;ffffffffc0af5650&amp;gt;] ? ptlrpc_init_rq_pool+0x110/0x110 [ptlrpc]
[10125.208966]  [&amp;lt;ffffffffc0aff3b0&amp;gt;] ptlrpc_set_wait+0x480/0x790 [ptlrpc]
[10125.209703]  [&amp;lt;ffffffffac2cf670&amp;gt;] ? wake_up_state+0x20/0x20
[10125.210355]  [&amp;lt;ffffffffc0aff73d&amp;gt;] ptlrpc_queue_wait+0x7d/0x220 [ptlrpc]
[10125.211068]  [&amp;lt;ffffffffc0ae46a2&amp;gt;] ldlm_cli_enqueue+0x3d2/0x920 [ptlrpc]
[10125.211774]  [&amp;lt;ffffffffc0adf7c0&amp;gt;] ? ldlm_expired_completion_wait+0x220/0x220 [ptlrpc]
[10125.212679]  [&amp;lt;ffffffffc0c7e710&amp;gt;] ? osc_lock_lockless_cancel+0xe0/0xe0 [osc]
[10125.213491]  [&amp;lt;ffffffffc0c7dad0&amp;gt;] ? osc_lock_upcall+0x580/0x580 [osc]
[10125.214167]  [&amp;lt;ffffffffc0c74965&amp;gt;] osc_enqueue_base+0x2b5/0x6a0 [osc]
[10125.214844]  [&amp;lt;ffffffffc0c7d550&amp;gt;] ? osc_lock_lvb_update+0x330/0x330 [osc]
[10125.215616]  [&amp;lt;ffffffffc0c7f17b&amp;gt;] osc_lock_enqueue+0x38b/0x840 [osc]
[10125.216297]  [&amp;lt;ffffffffc0c7d550&amp;gt;] ? osc_lock_lvb_update+0x330/0x330 [osc]
[10125.217058]  [&amp;lt;ffffffffc0935de5&amp;gt;] cl_lock_enqueue+0x65/0x120 [obdclass]
[10125.217778]  [&amp;lt;ffffffffc0cd8285&amp;gt;] lov_lock_enqueue+0x95/0x150 [lov]
[10125.218497]  [&amp;lt;ffffffffc0935de5&amp;gt;] cl_lock_enqueue+0x65/0x120 [obdclass]
[10125.219225]  [&amp;lt;ffffffffc0936377&amp;gt;] cl_lock_request+0x67/0x1f0 [obdclass]
[10125.219931]  [&amp;lt;ffffffffc093a26b&amp;gt;] cl_io_lock+0x2bb/0x3d0 [obdclass]
[10125.220688]  [&amp;lt;ffffffffc093a5fb&amp;gt;] cl_io_loop+0x11b/0xc70 [obdclass]
[10125.221444]  [&amp;lt;ffffffffc0d30e02&amp;gt;] ll_file_io_generic+0x4e2/0xd10 [lustre]
[10125.222178]  [&amp;lt;ffffffffc0d31b82&amp;gt;] ll_file_aio_write+0x372/0x540 [lustre]
[10125.222880]  [&amp;lt;ffffffffc0d31df4&amp;gt;] ll_file_write+0xa4/0x170 [lustre]
[10125.223591]  [&amp;lt;ffffffffac41b490&amp;gt;] vfs_write+0xc0/0x1f0
[10125.224151]  [&amp;lt;ffffffffac9206e1&amp;gt;] ? system_call_after_swapgs+0xae/0x146
[10125.224845]  [&amp;lt;ffffffffac41c2bf&amp;gt;] SyS_write+0x7f/0xf0
[10125.225434]  [&amp;lt;ffffffffac9206e1&amp;gt;] ? system_call_after_swapgs+0xae/0x146
[10125.226136]  [&amp;lt;ffffffffac920795&amp;gt;] system_call_fastpath+0x1c/0x21
[10125.226762]  [&amp;lt;ffffffffac9206e1&amp;gt;] ? system_call_after_swapgs+0xae/0x146
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;br/&gt;
client logs show dd does 2000+ 1-second loops in &lt;tt&gt;ptlrpc_set_wait()&lt;/tt&gt;:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00010000:00010000:1.0:1536619472.378230:0:6412:0:(ldlm_request.c:942:ldlm_cli_enqueue()) ### client-side enqueue START, flags 0x40000000 ns: lustre-OST0002-osc-ffff968d3a345000 lock: ffff968d26d2c480/0x17ff4219e8fed853 lrc: 3/0,1 mode: --/PW res: [0x2ff0:0x0:0x0].0x0 rrc: 2 type: EXT [0-&amp;gt;4095] (req 0-&amp;gt;4095) flags: 0x0 nid: local remote: 0x0 expref: -99 pid: 6412 timeout: 0 lvb_type: 1
00010000:00010000:1.0:1536619472.378239:0:6412:0:(ldlm_request.c:1014:ldlm_cli_enqueue()) ### sending request ns: lustre-OST0002-osc-ffff968d3a345000 lock: ffff968d26d2c480/0x17ff4219e8fed853 lrc: 3/0,1 mode: --/PW res: [0x2ff0:0x0:0x0].0x0 rrc: 2 type: EXT [0-&amp;gt;4095] (req 0-&amp;gt;4095) flags: 0x0 nid: local remote: 0x0 expref: -99 pid: 6412 timeout: 0 lvb_type: 1
00000100:00080000:1.0:1536619472.378247:0:6412:0:(client.c:1569:ptlrpc_send_new_req()) @@@ req waiting for recovery: (FULL != CONNECTING)  req@ffff968d218dcc00 x1611261872492656/t0(0) o101-&amp;gt;lustre-OST0002-osc-ffff968d3a345000@10.9.5.125@tcp:28/4 lens 328/400 e 0 to 0 dl 0 ref 2 fl Rpc:W/0/ffffffff rc 0/-1
00000100:00100000:1.0:1536619472.378251:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619473.377745:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00010000:00010000:1.0:1536619473.852020:0:18075:0:(ldlm_lockd.c:1669:ldlm_handle_bl_callback()) ### client blocking AST callback handler ns: lustre-OST0001-osc-ffff968d3a345000 lock: ffff968cf7975440/0x17ff4219e8fed845 lrc: 2/0,0 mode: PR/PR res: [0x2fd0:0x0:0x0].0x0 rrc: 2 type: EXT [0-&amp;gt;18446744073709551615] (req 0-&amp;gt;18446744073709551615) flags: 0x420000010000 nid: local remote: 0x61c83a90b60e6c16 expref: -99 pid: 6411 timeout: 0 lvb_type: 1
00010000:00010000:1.0:1536619473.852031:0:18075:0:(ldlm_lockd.c:1700:ldlm_handle_bl_callback()) Lock ffff968cf7975440 already unused, calling callback (ffffffffc0c7e710)
00010000:00010000:1.0:1536619473.852034:0:18075:0:(ldlm_request.c:1251:ldlm_cli_cancel_local()) ### client-side cancel ns: lustre-OST0001-osc-ffff968d3a345000 lock: ffff968cf7975440/0x17ff4219e8fed845 lrc: 3/0,0 mode: PR/PR res: [0x2fd0:0x0:0x0].0x0 rrc: 2 type: EXT [0-&amp;gt;18446744073709551615] (req 0-&amp;gt;18446744073709551615) flags: 0x428400010000 nid: local remote: 0x61c83a90b60e6c16 expref: -99 pid: 6411 timeout: 0 lvb_type: 1
00010000:00010000:1.0:1536619473.852117:0:18075:0:(ldlm_request.c:1310:ldlm_cancel_pack()) ### packing ns: lustre-OST0001-osc-ffff968d3a345000 lock: ffff968cf7975440/0x17ff4219e8fed845 lrc: 2/0,0 mode: --/PR res: [0x2fd0:0x0:0x0].0x0 rrc: 2 type: EXT [0-&amp;gt;18446744073709551615] (req 0-&amp;gt;18446744073709551615) flags: 0x4c29400010000 nid: local remote: 0x61c83a90b60e6c16 expref: -99 pid: 6411 timeout: 0 lvb_type: 1
00010000:00010000:1.0:1536619473.852129:0:18075:0:(ldlm_request.c:1314:ldlm_cancel_pack()) 1 locks packed
00010000:00010000:1.0:1536619473.852141:0:18075:0:(ldlm_lockd.c:1709:ldlm_handle_bl_callback()) ### client blocking callback handler END ns: lustre-OST0001-osc-ffff968d3a345000 lock: ffff968cf7975440/0x17ff4219e8fed845 lrc: 1/0,0 mode: --/PR res: [0x2fd0:0x0:0x0].0x0 rrc: 2 type: EXT [0-&amp;gt;18446744073709551615] (req 0-&amp;gt;18446744073709551615) flags: 0x4c29400010000 nid: local remote: 0x61c83a90b60e6c16 expref: -99 pid: 6411 timeout: 0 lvb_type: 1
00010000:00010000:1.0:1536619473.852147:0:18075:0:(ldlm_lock.c:197:ldlm_lock_put()) ### final lock_put on destroyed lock, freeing it. ns: lustre-OST0001-osc-ffff968d3a345000 lock: ffff968cf7975440/0x17ff4219e8fed845 lrc: 0/0,0 mode: --/PR res: [0x2fd0:0x0:0x0].0x0 rrc: 2 type: EXT [0-&amp;gt;18446744073709551615] (req 0-&amp;gt;18446744073709551615) flags: 0x4c29400010000 nid: local remote: 0x61c83a90b60e6c16 expref: -99 pid: 6411 timeout: 0 lvb_type: 1
00000100:00100000:1.0:1536619473.852166:0:1881:0:(client.c:1625:ptlrpc_send_new_req()) Sending RPC pname:cluuid:pid:xid:nid:opc ptlrpcd_01_01:ac35fb57-abc0-fa1f-db48-e4504a124ce3:1881:1611261872492672:10.9.5.125@tcp:103
00000100:00100000:1.0:1536619473.852624:0:1881:0:(client.c:2053:ptlrpc_check_set()) Completed RPC pname:cluuid:pid:xid:nid:opc ptlrpcd_01_01:ac35fb57-abc0-fa1f-db48-e4504a124ce3:1881:1611261872492672:10.9.5.125@tcp:103
00000100:00100000:1.0:1536619474.377727:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619475.377720:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619476.377721:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619477.377737:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619478.377757:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619479.377746:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619480.377722:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619481.377735:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619482.377718:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619483.377739:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619484.377729:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619485.377712:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536619486.377724:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
...
00000100:00100000:1.0:1536621583.377721:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536621584.377725:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
00000100:00100000:1.0:1536621585.377723:0:6412:0:(client.c:2347:ptlrpc_set_wait()) set ffff968d23136180 going to sleep for 0 seconds
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;VVVVVVV DO NOT REMOVE LINES BELOW, Added by Maloo for auto-association VVVVVVV&lt;br/&gt;
 sanity test_156 - Timeout occurred after 166 mins, last suite running was sanity, restarting cluster to continue tests&lt;/p&gt;</description>
                <environment></environment>
        <key id="53273">LU-11362</key>
            <summary>sanity test_156: timeout loop in ptlrpc_check_set()</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="1" iconUrl="https://jira.whamcloud.com/images/icons/statuses/open.png" description="The issue is open and ready for the assignee to start work on it.">Open</status>
                    <statusCategory id="2" key="new" colorName="default"/>
                                    <resolution id="-1">Unresolved</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Tue, 11 Sep 2018 13:55:50 +0000</created>
                <updated>Thu, 13 Sep 2018 05:34:39 +0000</updated>
                                                                                <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="233325" author="jhammond" created="Tue, 11 Sep 2018 14:02:08 +0000"  >&lt;p&gt;Alex, it looks like &lt;a href=&quot;https://review.whamcloud.com/16682&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/16682&lt;/a&gt; (&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-7236&quot; title=&quot;connections on demand&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-7236&quot;&gt;&lt;del&gt;LU-7236&lt;/del&gt;&lt;/a&gt; ptlrpc: idle connections can disconnect) was the last to touch this code. Can you comment?&lt;/p&gt;</comment>
                            <comment id="233326" author="bzzz" created="Tue, 11 Sep 2018 14:10:41 +0000"  >&lt;p&gt;from the logs the cause isn&apos;t 100% clear, but the symptoms point to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11128&quot; title=&quot;replay-single test timeout&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11128&quot;&gt;&lt;del&gt;LU-11128&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="52659">LU-11128</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i0026n:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>