<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:51:40 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5458] oops in libcfs_kkuc_group_put</title>
                <link>https://jira.whamcloud.com/browse/LU-5458</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;We got the following oops during testing:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;&amp;lt;4&amp;gt;PGD 0 
&amp;lt;4&amp;gt;Oops: 0002 [#1] SMP 
&amp;lt;4&amp;gt;last sysfs file: /sys/devices/system/cpu/online
&amp;lt;4&amp;gt;CPU 1 
&amp;lt;4&amp;gt;Modules linked in: lmv(U) fld(U) mgc(U) lustre(U) lov(U) osc(U) mdc(U) fid(U) ksocklnd(U) ptlrpc(U) obdclass(U) lnet(U) lvfs(U) sha512_generic sha256_generic crc32c_intel libcfs(U) nfs lockd fscache auth_rpcgss nfs_acl sunrpc VSMqfs(P)(U) autofs4 ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ipv6 ppdev vmware_balloon parport_pc parport vmxnet3 sg i2c_piix4 i2c_core shpchp ext4 jbd2 mbcache sd_mod crc_t10dif sr_mod cdrom vmw_pvscsi pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan]
&amp;lt;4&amp;gt;
&amp;lt;4&amp;gt;Pid: 24385, comm: ldlm_cb00_056 Tainted: P           ---------------    2.6.32-431.17.1.el6.x86_64 #1 VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform
&amp;lt;4&amp;gt;RIP: 0010:[&amp;lt;ffffffff8118a509&amp;gt;]  [&amp;lt;ffffffff8118a509&amp;gt;] fput+0x9/0x30
&amp;lt;4&amp;gt;RSP: 0018:ffff88012db55c20  EFLAGS: 00010246
&amp;lt;4&amp;gt;RAX: 00000000ffffffe0 RBX: ffff8800a8ea4fc0 RCX: 0000000000000000
&amp;lt;4&amp;gt;RDX: ffffffffa03c9eb0 RSI: 0000000000000000 RDI: 0000000000000000
&amp;lt;4&amp;gt;RBP: ffff88012db55c20 R08: 00000000ffffff0a R09: 00000000fffffffc
&amp;lt;4&amp;gt;R10: 0000000000000001 R11: 282064656c696166 R12: ffffffffa03c9c60
&amp;lt;4&amp;gt;R13: ffff88005df240f8 R14: 0000000000000000 R15: ffff88013b4ca000
&amp;lt;4&amp;gt;FS:  0000000000000000(0000) GS:ffff880028280000(0000) knlGS:0000000000000000
&amp;lt;4&amp;gt;CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
&amp;lt;4&amp;gt;CR2: 0000000000000030 CR3: 0000000001a85000 CR4: 00000000000407e0
&amp;lt;4&amp;gt;DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
&amp;lt;4&amp;gt;DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
&amp;lt;4&amp;gt;Process ldlm_cb00_056 (pid: 24385, threadinfo ffff88012db54000, task ffff88012da5f500)
&amp;lt;4&amp;gt;Stack:
&amp;lt;4&amp;gt; ffff88012db55c60 ffffffffa0388044 0000000000000002 00000000ffffffe0
&amp;lt;4&amp;gt;&amp;lt;d&amp;gt; ffff88005df240f8 ffff88005df24100 ffffffffa07103be ffff8801398d7000
&amp;lt;4&amp;gt;&amp;lt;d&amp;gt; ffff88012db55cc0 ffffffffa08649f7 ffff88008c9af3f0 000000008116f303
&amp;lt;4&amp;gt;Call Trace:
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0388044&amp;gt;] libcfs_kkuc_group_put+0x94/0x180 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa08649f7&amp;gt;] mdc_set_info_async+0x147/0x780 [mdc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa0699fad&amp;gt;] ldlm_callback_handler+0x4dd/0x1800 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa04c321f&amp;gt;] ? keys_fill+0x6f/0x190 [obdclass]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa06b8f6c&amp;gt;] ? lustre_msg_get_transno+0x8c/0x100 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa06bf61b&amp;gt;] ? ptlrpc_update_export_timer+0x4b/0x560 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa06c7a35&amp;gt;] ptlrpc_server_handle_request+0x385/0xc00 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa036f4ce&amp;gt;] ? cfs_timer_arm+0xe/0x10 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa03804ff&amp;gt;] ? lc_watchdog_touch+0x6f/0x170 [libcfs]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa06bf119&amp;gt;] ? ptlrpc_wait_event+0xa9/0x2d0 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff810546b9&amp;gt;] ? __wake_up_common+0x59/0x90
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa06c8d9d&amp;gt;] ptlrpc_main+0xaed/0x1740 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffffa06c82b0&amp;gt;] ? ptlrpc_main+0x0/0x1740 [ptlrpc]
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109ab56&amp;gt;] kthread+0x96/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c20a&amp;gt;] child_rip+0xa/0x20
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8109aac0&amp;gt;] ? kthread+0x0/0xa0
&amp;lt;4&amp;gt; [&amp;lt;ffffffff8100c200&amp;gt;] ? child_rip+0x0/0x20
&amp;lt;4&amp;gt;Code: fe ff ff 31 d2 48 89 de 83 cf ff ff d0 e9 da fe ff ff 48 89 df e8 f8 63 04 00 e9 bb fe ff ff 0f 1f 00 55 48 89 e5 0f 1f 44 00 00 &amp;lt;f0&amp;gt; 48 ff 4f 30 0f 94 c0 84 c0 75 0b c9 c3 66 0f 1f 84 00 00 00 
&amp;lt;1&amp;gt;RIP  [&amp;lt;ffffffff8118a509&amp;gt;] fput+0x9/0x30
&amp;lt;4&amp;gt; RSP &amp;lt;ffff88012db55c20&amp;gt;
&amp;lt;4&amp;gt;CR2: 0000000000000030
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;crash bt:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;PID: 24385  TASK: ffff88012da5f500  CPU: 1   COMMAND: &quot;ldlm_cb00_056&quot;
 #0 [ffff88012db55810] machine_kexec at ffffffff81038f3b
 #1 [ffff88012db55870] crash_kexec at ffffffff810c59f2
 #2 [ffff88012db55940] oops_end at ffffffff8152b7f0
 #3 [ffff88012db55970] no_context at ffffffff8104a00b
 #4 [ffff88012db559c0] __bad_area_nosemaphore at ffffffff8104a295
 #5 [ffff88012db55a10] bad_area_nosemaphore at ffffffff8104a363
 #6 [ffff88012db55a20] __do_page_fault at ffffffff8104aabf
 #7 [ffff88012db55b40] do_page_fault at ffffffff8152d73e
 #8 [ffff88012db55b70] page_fault at ffffffff8152aaf5
    [exception RIP: fput+9]
    RIP: ffffffff8118a509  RSP: ffff88012db55c20  RFLAGS: 00010246
    RAX: 00000000ffffffe0  RBX: ffff8800a8ea4fc0  RCX: 0000000000000000
    RDX: ffffffffa03c9eb0  RSI: 0000000000000000  RDI: 0000000000000000
    RBP: ffff88012db55c20   R8: 00000000ffffff0a   R9: 00000000fffffffc
    R10: 0000000000000001  R11: 282064656c696166  R12: ffffffffa03c9c60
    R13: ffff88005df240f8  R14: 0000000000000000  R15: ffff88013b4ca000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #9 [ffff88012db55c28] libcfs_kkuc_group_put at ffffffffa0388044 [libcfs]
#10 [ffff88012db55c68] mdc_set_info_async at ffffffffa08649f7 [mdc]
#11 [ffff88012db55cc8] ldlm_callback_handler at ffffffffa0699fad [ptlrpc]
#12 [ffff88012db55d68] ptlrpc_server_handle_request at ffffffffa06c7a35
[ptlrpc]
#13 [ffff88012db55e48] ptlrpc_main at ffffffffa06c8d9d [ptlrpc]
#14 [ffff88012db55ee8] kthread at ffffffff8109ab56
#15 [ffff88012db55f48] kernel_thread at ffffffff8100c20a
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The offending line in libcfs_kkuc_group_put() is&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;  fput(reg-&amp;gt;kr_fp);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;reg is coming from kkuc_groups, which is an array of lists.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash64&amp;gt; rd kkuc_groups 8
ffffffffa03c9c40:  0000000000000000 0000000000000000   ................
ffffffffa03c9c50:  0000000000000000 0000000000000000   ................
ffffffffa03c9c60:  ffff8800a8ea4fc0 ffff8800a8ea4fc0   .O.......O......
ffffffffa03c9c70:  0000000000000000 0000000000000000   ................
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;So only one element is on the lists.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;list ffffffffa03c9c60 -s kkuc_reg
...
ffff8800a8ea4fc0
struct kkuc_reg {
  kr_chain = {
    next = 0xffffffffa03c9c60 &amp;lt;kkuc_groups+32&amp;gt;,
    prev = 0xffffffffa03c9c60 &amp;lt;kkuc_groups+32&amp;gt;
  },
  kr_uid = 23389,
  kr_fp = 0x0,
  kr_data = 0xffff8800a8ea4f80
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;So apparently reg-&amp;gt;kr_fp is NULL. I&apos;m not sure about reg, but since&lt;br/&gt;
it&apos;s the only one in the list, and RBX=ffff8800a8ea4fc0, that must be it.&lt;/p&gt;

&lt;p&gt;Looking at libcfs_kkuc_group_put(), it appears that it is not locking&lt;br/&gt;
things properly:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;        down_read(&amp;amp;kg_sem);
        cfs_list_for_each_entry(reg, &amp;amp;kkuc_groups[group], kr_chain) {
...
                                fput(reg-&amp;gt;kr_fp);
                                reg-&amp;gt;kr_fp = NULL;
...
        up_read(&amp;amp;kg_sem);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Since reg can be modified, the lock should be down_write/up_write&lt;br/&gt;
instead. I suspect there was a race where 2 callers executed that&lt;br/&gt;
function. One won and the 2nd crashed.&lt;/p&gt;</description>
                <environment>Centos 6.5</environment>
        <key id="25896">LU-5458</key>
            <summary>oops in libcfs_kkuc_group_put</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="cliffw">Cliff White</assignee>
                                    <reporter username="fzago">Frank Zago</reporter>
                        <labels>
                            <label>patch</label>
                    </labels>
                <created>Wed, 6 Aug 2014 20:02:58 +0000</created>
                <updated>Wed, 8 Oct 2014 15:46:13 +0000</updated>
                            <resolved>Mon, 6 Oct 2014 12:00:16 +0000</resolved>
                                    <version>Lustre 2.6.0</version>
                    <version>Lustre 2.5.1</version>
                                    <fixVersion>Lustre 2.7.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="91005" author="fzago" created="Wed, 6 Aug 2014 20:11:00 +0000"  >&lt;p&gt;This might have some performance impact, but the existing code is not correct.&lt;/p&gt;

&lt;p&gt;  &lt;a href=&quot;http://review.whamcloud.com/11355&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/11355&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="91201" author="cliffw" created="Fri, 8 Aug 2014 17:48:57 +0000"  >&lt;p&gt;Thanks, will assign some more reviewers&lt;/p&gt;</comment>
                            <comment id="95694" author="pjones" created="Mon, 6 Oct 2014 12:00:16 +0000"  >&lt;p&gt;Landed for 2.7&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwt3r:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>15202</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>