<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:57:19 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12979] OSS hung during failback</title>
                <link>https://jira.whamcloud.com/browse/LU-12979</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;During OSS failover testing, the failover pair node hung during failback after system running for about 35 hours&lt;/p&gt;

&lt;p&gt;soak-7&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[17882.991912] Lustre: soaked-OST0006: deleting orphan objects from 0x0:1148098 to 0x0:1149825
[17884.486957] Lustre: Failing over soaked-OST0002
[17884.728160] Lustre: server umount soaked-OST0002 complete
[17895.395319] Lustre: Failing over soaked-OST000a
[17895.413977] Lustre: server umount soaked-OST000a complete
[17896.790424] LustreError: 137-5: soaked-OST0002_UUID: not available for connect from 192.168.1.124@o2ib (no target). If you are running an HA pair che
ck that the target is mounted on the other server.
[17896.810226] LustreError: Skipped 437 previous similar messages
[17902.199718] Lustre: Failing over soaked-OST0006
[17902.346578] Lustre: server umount soaked-OST0006 complete
[17908.627101] Lustre: Failing over soaked-OST000e
[17908.758771] Lustre: server umount soaked-OST000e complete
[17923.789823] Lustre: soaked-OST0007: Export ffff9724a6452c00 already connecting from 192.168.1.118@o2ib
[17931.042012] Lustre: soaked-OST0007: Export ffff9724d6813400 already connecting from 192.168.1.126@o2ib
[17937.914063] Lustre: soaked-OST0007: Export ffff9728a5360800 already connecting from 192.168.1.130@o2ib
[17940.946368] Lustre: soaked-OST0007: Export ffff9728d851d800 already connecting from 192.168.1.137@o2ib
[17940.956765] Lustre: Skipped 1 previous similar message
[17946.942948] Lustre: soaked-OST0007: Export ffff9728d81df400 already connecting from 192.168.1.124@o2ib
[17973.969095] Lustre: soaked-OST0007: Export ffff9724a6452c00 already connecting from 192.168.1.118@o2ib
[17973.979489] Lustre: Skipped 13 previous similar messages
[17991.122858] Lustre: soaked-OST0007: Export ffff9728d851d800 already connecting from 192.168.1.137@o2ib
[17991.133257] Lustre: Skipped 3 previous similar messages
[18024.148290] Lustre: soaked-OST0007: Export ffff9724a6452c00 already connecting from 192.168.1.118@o2ib
[18024.158682] Lustre: Skipped 14 previous similar messages
[18064.712164] Lustre: ll_ost00_008: service thread pid 18052 was inactive for 200.107 seconds. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[18064.733418] Pid: 18052, comm: ll_ost00_008 3.10.0-1062.1.1.el7_lustre.x86_64 #1 SMP Fri Nov 8 18:37:40 UTC 2019
[18064.744672] Call Trace:
[18064.747423]  [&amp;lt;ffffffffc091e2d5&amp;gt;] cv_wait_common+0x125/0x150 [spl]
[18064.754345]  [&amp;lt;ffffffffc091e315&amp;gt;] __cv_wait+0x15/0x20 [spl]
[18064.760577]  [&amp;lt;ffffffffc0c992ef&amp;gt;] txg_wait_synced+0xef/0x140 [zfs]
[18064.767530]  [&amp;lt;ffffffffc0c4ecc5&amp;gt;] dmu_tx_wait+0x275/0x3c0 [zfs]
[18064.774174]  [&amp;lt;ffffffffc0c4eea2&amp;gt;] dmu_tx_assign+0x92/0x490 [zfs]
[18064.780910]  [&amp;lt;ffffffffc16c2fd9&amp;gt;] osd_trans_start+0x199/0x440 [osd_zfs]
[18064.788312]  [&amp;lt;ffffffffc14e8430&amp;gt;] tgt_server_data_update+0x3c0/0x510 [ptlrpc]
[18064.796347]  [&amp;lt;ffffffffc14ea40d&amp;gt;] tgt_client_del+0x29d/0x6a0 [ptlrpc]
[18064.803581]  [&amp;lt;ffffffffc180523c&amp;gt;] ofd_obd_disconnect+0x1ac/0x220 [ofd]
[18064.810885]  [&amp;lt;ffffffffc144f176&amp;gt;] target_handle_disconnect+0xd6/0x450 [ptlrpc]
[18064.818985]  [&amp;lt;ffffffffc14f0d38&amp;gt;] tgt_disconnect+0x58/0x170 [ptlrpc]
[18064.826127]  [&amp;lt;ffffffffc14f983a&amp;gt;] tgt_request_handle+0x98a/0x1630 [ptlrpc]
[18064.833851]  [&amp;lt;ffffffffc149ba96&amp;gt;] ptlrpc_server_handle_request+0x256/0xb10 [ptlrpc]
[18064.842439]  [&amp;lt;ffffffffc149f5cc&amp;gt;] ptlrpc_main+0xbac/0x1540 [ptlrpc]
[18064.849482]  [&amp;lt;ffffffff93cc50d1&amp;gt;] kthread+0xd1/0xe0
[18064.854943]  [&amp;lt;ffffffff9438cd37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[18064.861953]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[18065.736218] Lustre: ll_ost00_006: service thread pid 18039 was inactive for 200.435 seconds. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[18065.757478] Pid: 18039, comm: ll_ost00_006 3.10.0-1062.1.1.el7_lustre.x86_64 #1 SMP Fri Nov 8 18:37:40 UTC 2019
[18065.768742] Call Trace:
[18065.771481]  [&amp;lt;ffffffffc091e2d5&amp;gt;] cv_wait_common+0x125/0x150 [spl]
[18065.778397]  [&amp;lt;ffffffffc091e315&amp;gt;] __cv_wait+0x15/0x20 [spl]
[18065.784636]  [&amp;lt;ffffffffc0c992ef&amp;gt;] txg_wait_synced+0xef/0x140 [zfs]
[18065.791576]  [&amp;lt;ffffffffc0c4ecc5&amp;gt;] dmu_tx_wait+0x275/0x3c0 [zfs]
[18065.798219]  [&amp;lt;ffffffffc0c4eea2&amp;gt;] dmu_tx_assign+0x92/0x490 [zfs]
[18065.804957]  [&amp;lt;ffffffffc16c2fd9&amp;gt;] osd_trans_start+0x199/0x440 [osd_zfs]
[18065.812358]  [&amp;lt;ffffffffc14e8430&amp;gt;] tgt_server_data_update+0x3c0/0x510 [ptlrpc]
[18065.820373]  [&amp;lt;ffffffffc14ea40d&amp;gt;] tgt_client_del+0x29d/0x6a0 [ptlrpc]
[18065.827613]  [&amp;lt;ffffffffc180523c&amp;gt;] ofd_obd_disconnect+0x1ac/0x220 [ofd]
[18065.834923]  [&amp;lt;ffffffffc144f176&amp;gt;] target_handle_disconnect+0xd6/0x450 [ptlrpc]
[18065.843022]  [&amp;lt;ffffffffc14f0d38&amp;gt;] tgt_disconnect+0x58/0x170 [ptlrpc]
[18065.850165]  [&amp;lt;ffffffffc14f983a&amp;gt;] tgt_request_handle+0x98a/0x1630 [ptlrpc]
[18065.857886]  [&amp;lt;ffffffffc149ba96&amp;gt;] ptlrpc_server_handle_request+0x256/0xb10 [ptlrpc]
[18065.866478]  [&amp;lt;ffffffffc149f5cc&amp;gt;] ptlrpc_main+0xbac/0x1540 [ptlrpc]
[18065.873511]  [&amp;lt;ffffffff93cc50d1&amp;gt;] kthread+0xd1/0xe0
[18065.878972]  [&amp;lt;ffffffff9438cd37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[18065.885982]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
...

[21128.204987] Lustre: Skipped 1 previous similar message
[21138.287341] Lustre: soaked-OST0002: Not available for connect from 192.168.1.137@o2ib (not set up)
[21138.297347] Lustre: Skipped 2 previous similar messages
[21167.633849] Lustre: soaked-OST0002: Not available for connect from 192.168.1.111@o2ib (not set up)
[21167.643858] Lustre: Skipped 9 previous similar messages
[21195.356109] Lustre: 23774:0:(service.c:1442:ptlrpc_at_send_early_reply()) @@@ Could not add any time (5/5), not sending early reply  req@ffff97278de0e780 x1650366648233984/t0(0) o5-&amp;gt;soaked-MDT0002-mdtlov_UUID@192.168.1.110@o2ib:545/0 lens 432/432 e 1 to 0 dl 1573925640 ref 2 fl Interpret:/0/0 rc 0/0 job:&apos;&apos;
[21195.386391] Lustre: 23774:0:(service.c:1442:ptlrpc_at_send_early_reply()) Skipped 3 previous similar messages
[21199.340302] ptlrpc_watchdog_fire: 3 callbacks suppressed
[21199.346241] Lustre: ll_ost00_024: service thread pid 5497 was inactive for 1200.379 seconds. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:
[21199.367494] Pid: 5497, comm: ll_ost00_024 3.10.0-1062.1.1.el7_lustre.x86_64 #1 SMP Fri Nov 8 18:37:40 UTC 2019
[21199.378663] Call Trace:
[21199.381399]  [&amp;lt;ffffffffc1801873&amp;gt;] ofd_create_hdl+0xcc3/0x2100 [ofd]
[21199.388410]  [&amp;lt;ffffffffc14f983a&amp;gt;] tgt_request_handle+0x98a/0x1630 [ptlrpc]
[21199.396155]  [&amp;lt;ffffffffc149ba96&amp;gt;] ptlrpc_server_handle_request+0x256/0xb10 [ptlrpc]
[21199.404748]  [&amp;lt;ffffffffc149f5cc&amp;gt;] ptlrpc_main+0xbac/0x1540 [ptlrpc]
[21199.411790]  [&amp;lt;ffffffff93cc50d1&amp;gt;] kthread+0xd1/0xe0
[21199.417239]  [&amp;lt;ffffffff9438cd37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[21199.424249]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[21203.436516] Pid: 18121, comm: ll_ost01_023 3.10.0-1062.1.1.el7_lustre.x86_64 #1 SMP Fri Nov 8 18:37:40 UTC 2019
[21203.447781] Call Trace:
[21203.450514]  [&amp;lt;ffffffffc1801873&amp;gt;] ofd_create_hdl+0xcc3/0x2100 [ofd]
[21203.457536]  [&amp;lt;ffffffffc14f983a&amp;gt;] tgt_request_handle+0x98a/0x1630 [ptlrpc]
[21203.465297]  [&amp;lt;ffffffffc149ba96&amp;gt;] ptlrpc_server_handle_request+0x256/0xb10 [ptlrpc]
[21203.473893]  [&amp;lt;ffffffffc149f5cc&amp;gt;] ptlrpc_main+0xbac/0x1540 [ptlrpc]
[21203.480928]  [&amp;lt;ffffffff93cc50d1&amp;gt;] kthread+0xd1/0xe0
[21203.486390]  [&amp;lt;ffffffff9438cd37&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[21203.493401]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[21217.812639] Lustre: soaked-OST0002: Not available for connect from 192.168.1.111@o2ib (not set up)
[21217.822646] Lustre: Skipped 23 previous similar messages

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>b2_13-ib build #2</environment>
        <key id="57407">LU-12979</key>
            <summary>OSS hung during failback</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="sarah">Sarah Liu</reporter>
                        <labels>
                            <label>soak</label>
                    </labels>
                <created>Mon, 18 Nov 2019 18:49:46 +0000</created>
                <updated>Thu, 16 Apr 2020 07:33:46 +0000</updated>
                            <resolved>Thu, 16 Apr 2020 07:33:46 +0000</resolved>
                                    <version>Lustre 2.13.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="258545" author="jgmitter" created="Wed, 20 Nov 2019 15:09:14 +0000"  >&lt;p&gt;Hi Alex,&lt;/p&gt;

&lt;p&gt;Could you take a look to see what may be going on here?&lt;/p&gt;

&lt;p&gt;Thanks.&lt;br/&gt;
Joe&lt;/p&gt;</comment>
                            <comment id="258546" author="bzzz" created="Wed, 20 Nov 2019 15:11:53 +0000"  >&lt;p&gt;it would be very helpful to get full stack traces.. I think I saw something similar in maloo recently, but also w/o full traces..&lt;/p&gt;</comment>
                            <comment id="258645" author="sarah" created="Thu, 21 Nov 2019 17:25:16 +0000"  >&lt;p&gt;Sorry, there is no full stack. I will try and see if this issue can be reproduce and get the stack&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00plj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>