<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:26:38 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-9488] soft lockup in osd_inode_iteration()</title>
                <link>https://jira.whamcloud.com/browse/LU-9488</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Restarted testing with latest master, 3577&lt;br/&gt;
Competed two failovers with lfsck turned off.&lt;br/&gt;
Restarted with lfsck turned on&lt;br/&gt;
soak-5 (OSS) completed failover:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;2017-05-09 20:08:27
,327:fsmgmt.fsmgmt:INFO     oss_failover completed, running lfsck
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;MDS reported a single error:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;May  9 20:08:39 soak-8 kernel: LustreError: 5550:0:(lfsck_lib.c:2680:lfsck_load_one_trace_file()) soaked-MDT0000-osd: unlink lfsck sub trace file lfsck_namespace_00: rc = 0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Before soak hits timeout, MDS has wedged:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;May  9 20:12:41 soak-8 kernel: NMI watchdog: BUG: soft lockup - CPU#6 stuck &lt;span class=&quot;code-keyword&quot;&gt;for&lt;/span&gt; 23s! [OI_scrub:5551]
May  9 20:12:41 soak-8 kernel: Modules linked in: osp(OE) mdd(OE) lod(OE) mdt(OE) lfsck(OE) mgs(OE) mgc(OE) osd_ldiskfs(OE) ldiskfs(OE) lquota(OE) fid(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) zfs(POE) zunicode(POE) zavl(POE) zcommon(POE) znvpair(POE) spl(OE) zlib_deflate 8021q garp mrp stp llc rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm mlx4_ib ib_core intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd dm_round_robin ipmi_ssif sb_edac ipmi_devintf ntb sg iTCO_wdt ioatdma shpchp edac_core mei_me iTCO_vendor_support mei lpc_ich ipmi_si pcspkr i2c_i801
May  9 20:12:41 soak-8 kernel: ipmi_msghandler wmi nfsd dm_multipath dm_mod nfs_acl lockd grace auth_rpcgss sunrpc ip_tables ext4 mbcache jbd2 sd_mod crc_t10dif crct10dif_generic mlx4_en mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops isci igb ttm ahci crct10dif_pclmul crct10dif_common ptp libsas crc32c_intel libahci pps_core drm mlx4_core mpt2sas libata dca raid_class i2c_algo_bit scsi_transport_sas devlink i2c_core fjes
May  9 20:12:41 soak-8 kernel: CPU: 6 PID: 5551 Comm: OI_scrub Tainted: P           OE  ------------   3.10.0-514.16.1.el7_lustre.x86_64 #1
May  9 20:12:41 soak-8 kernel: Hardware name: Intel Corporation SandyBridge Platform/To be filled by O.E.M., BIOS SE5C600.86B.01.08.0003.022620131521 02/26/2013
May  9 20:12:41 soak-8 kernel: task: ffff88083fde6dd0 ti: ffff880703600000 task.ti: ffff880703600000
May  9 20:12:41 soak-8 kernel: RIP: 0010:[&amp;lt;ffffffffa121d1d9&amp;gt;]  [&amp;lt;ffffffffa121d1d9&amp;gt;] osd_inode_iteration+0x489/0xcc0 [osd_ldiskfs]
May  9 20:12:41 soak-8 kernel: RSP: 0018:ffff880703603d18  EFLAGS: 00000293
May  9 20:12:41 soak-8 kernel: RAX: 0000000000000004 RBX: 0000000023f30a01 RCX: 0000000000000000
May  9 20:12:41 soak-8 kernel: RDX: ffff880703603d78 RSI: ffff8800b2a36000 RDI: ffff8803162f6000
May  9 20:12:41 soak-8 kernel: RBP: ffff880703603df0 R08: ffff880703603d57 R09: 0000000000000004
May  9 20:12:41 soak-8 kernel: R10: 0000000023f30a01 R11: ffffea000c8fcc00 R12: 0000000023f30a01
May  9 20:12:41 soak-8 kernel: R13: ffff880703603d08 R14: 0000000023f30a01 R15: ffff880703603d08
May  9 20:12:41 soak-8 kernel: FS:  0000000000000000(0000) GS:ffff88042e180000(0000) knlGS:0000000000000000
May  9 20:12:41 soak-8 kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
May  9 20:12:41 soak-8 kernel: CR2: 00007f64d55202e0 CR3: 00000000019be000 CR4: 00000000000407e0
May  9 20:12:41 soak-8 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
May  9 20:12:41 soak-8 kernel: DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
May  9 20:12:41 soak-8 kernel: Stack:
May  9 20:12:41 soak-8 kernel: ffffffffa121b990 ffffffffa1217a20 ffff8800b2a36000 00000000810d354f
May  9 20:12:41 soak-8 kernel: ffff8803162f6000 ffff8800b2a37468 0000000020000000 010000000000000c
May  9 20:12:41 soak-8 kernel: 0000000000000000 0000000000000000 ffff8800b2a36000 0000000000000000
May  9 20:12:41 soak-8 kernel: Call Trace:
May  9 20:12:41 soak-8 kernel: [&amp;lt;ffffffffa121b990&amp;gt;] ? osd_ios_ROOT_scan+0x300/0x300 [osd_ldiskfs]
May  9 20:12:41 soak-8 kernel: [&amp;lt;ffffffffa1217a20&amp;gt;] ? osd_preload_next+0xb0/0xb0 [osd_ldiskfs]
May  9 20:12:41 soak-8 kernel: [&amp;lt;ffffffffa121e370&amp;gt;] osd_scrub_main+0x960/0xf30 [osd_ldiskfs]
May  9 20:12:41 soak-8 kernel: [&amp;lt;ffffffff810c54c0&amp;gt;] ? wake_up_state+0x20/0x20
May  9 20:12:41 soak-8 kernel: [&amp;lt;ffffffffa121da10&amp;gt;] ? osd_inode_iteration+0xcc0/0xcc0 [osd_ldiskfs]
May  9 20:12:41 soak-8 kernel: [&amp;lt;ffffffff810b0a4f&amp;gt;] kthread+0xcf/0xe0
May  9 20:12:41 soak-8 kernel: [&amp;lt;ffffffff810b0980&amp;gt;] ? kthread_create_on_node+0x140/0x140
May  9 20:12:41 soak-8 kernel: [&amp;lt;ffffffff81697318&amp;gt;] ret_from_fork+0x58/0x90
May  9 20:12:41 soak-8 kernel: [&amp;lt;ffffffff810b0980&amp;gt;] ? kthread_create_on_node+0x140/0x140
May  9 20:12:41 soak-8 kernel: Code: 00 e8 7c eb 97 ff e9 0f fc ff ff 0f 1f 80 00 00 00 00 45 89 e9 4c 8d 85 67 ff ff ff 48 8b 4d a8 48 8d 55 88 48 8b b5 38 ff ff ff &amp;lt;48&amp;gt; 8b bd 48 ff ff ff 48 8b 85 28 ff ff ff ff d0 85 c0 41 89 c5
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;lfsck hit 600 second timeout, abort attempted:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;2017-05-09 20:18:46,982:fsmgmt.fsmgmt:ERROR    lfsck still running after 600s, aborting
2017-05-09 20:18:46,983:fsmgmt.fsmgmt:INFO     executing cmd: lctl lfsck_stop -M soaked-MDT0000
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;One the MDS hits the lockup, it hits it over and over and within minutes is doing nothing other that hitting the lockup. &lt;br/&gt;
At this point, decided not to wait for the crash, dumped stacks, them forced a crash dump.&lt;br/&gt;
Crash dump is available on soak&lt;/p&gt;</description>
                <environment>Soak test cluster</environment>
        <key id="46015">LU-9488</key>
            <summary>soft lockup in osd_inode_iteration()</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="yong.fan">nasf</assignee>
                                    <reporter username="cliffw">Cliff White</reporter>
                        <labels>
                    </labels>
                <created>Wed, 10 May 2017 17:36:28 +0000</created>
                <updated>Wed, 6 Sep 2017 18:57:05 +0000</updated>
                            <resolved>Sat, 10 Jun 2017 12:42:38 +0000</resolved>
                                    <version>Lustre 2.10.0</version>
                                    <fixVersion>Lustre 2.10.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="195537" author="pjones" created="Thu, 11 May 2017 17:50:10 +0000"  >&lt;p&gt;Fan Yong&lt;/p&gt;

&lt;p&gt;Could you please assist with this one?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="196604" author="gerrit" created="Mon, 22 May 2017 14:26:38 +0000"  >&lt;p&gt;Fan Yong (fan.yong@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/27228&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/27228&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9488&quot; title=&quot;soft lockup in osd_inode_iteration()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9488&quot;&gt;&lt;del&gt;LU-9488&lt;/del&gt;&lt;/a&gt; scrub: reset noslot for full speed scrub&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: e7b6dfcd1f83d35aaa82856e69863b4bc2695103&lt;/p&gt;</comment>
                            <comment id="196608" author="yong.fan" created="Mon, 22 May 2017 14:32:13 +0000"  >&lt;p&gt;Cliff,&lt;/p&gt;

&lt;p&gt;I hope that the patch &lt;a href=&quot;https://review.whamcloud.com/27228&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/27228&lt;/a&gt; can fix the OI scrub soft lockup trouble. But if was not, please dump the oi_scrub via proc interface. The patch adds more information the /proc/fs/lustre/osd-ldiskfs/xxx/oi_scrub output, they are helpful to understand the OI scrub status.&lt;/p&gt;

&lt;p&gt;Thanks!&lt;/p&gt;</comment>
                            <comment id="198806" author="gerrit" created="Sat, 10 Jun 2017 02:49:05 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/27228/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/27228/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9488&quot; title=&quot;soft lockup in osd_inode_iteration()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9488&quot;&gt;&lt;del&gt;LU-9488&lt;/del&gt;&lt;/a&gt; scrub: reset noslot for full speed scrub&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: e2ad44de38a4ef42b975e71c01bbfce0ef7dd827&lt;/p&gt;</comment>
                            <comment id="198824" author="pjones" created="Sat, 10 Jun 2017 12:42:38 +0000"  >&lt;p&gt;Landed for 2.10&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="45629">LU-9381</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="48188">LU-9952</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="26647" name="soak-8.console.log" size="2334734" author="cliffw" created="Wed, 10 May 2017 17:36:52 +0000"/>
                            <attachment id="26648" name="soak-8.vmcore-dmesg.txt" size="1033888" author="cliffw" created="Wed, 10 May 2017 17:37:03 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzzcfj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>