<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:32:04 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-17033] Add RCU protect for export nid operation</title>
                <link>https://jira.whamcloud.com/browse/LU-17033</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;A few of crash relate to exp_nid_hash. Looks it was operated without RCU protect.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[  257.896656] BUG: unable to handle kernel NULL pointer dereference at 00000000000000e2
[  257.897791] IP: [&amp;lt;ffffffffc0cf1eb0&amp;gt;] ldebugfs_rhash_seq_show+0xa0/0x1e0 [obdclass]
[  257.898814] PGD 21c80e0067 PUD 21bab0c067 PMD 0
[  257.899472] Oops: 0000 [#1] SMP
[  257.914018] CPU: 9 PID: 13241 Comm: lctl Kdump: loaded Tainted: G           OE  ------------ T 3.10.0-1160.95.1.el7_lustre.ddn17.x86_64 #1
[  257.915601] Hardware name: DDN SFA400NVX2E, BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
[  257.916811] task: ffffa1678707d280 ti: ffffa168c6f54000 task.ti: ffffa168c6f54000
[  257.917773] RIP: 0010:[&amp;lt;ffffffffc0cf1eb0&amp;gt;]  [&amp;lt;ffffffffc0cf1eb0&amp;gt;] ldebugfs_rhash_seq_show+0xa0/0x1e0 [obdclass]
[  257.919093] RSP: 0018:ffffa168c6f57d78  EFLAGS: 00010246
[  257.944326] Call Trace:
[  257.945836]  [&amp;lt;ffffffff8c084e93&amp;gt;] ? seq_printf+0x53/0x80
[  257.947705]  [&amp;lt;ffffffffc0cf20b0&amp;gt;] lprocfs_hash_seq_show+0x60/0x90 [obdclass]
[  257.949770]  [&amp;lt;ffffffffc15ff862&amp;gt;] mgs_hash_seq_show+0x12/0x20 [mgs]
[  257.951731]  [&amp;lt;ffffffff8c0857f8&amp;gt;] seq_read+0x138/0x460
[  257.953549]  [&amp;lt;ffffffff8c0d7ad0&amp;gt;] proc_reg_read+0x40/0x80
[  257.955357]  [&amp;lt;ffffffff8c05bb2f&amp;gt;] vfs_read+0x9f/0x170
[  257.957088]  [&amp;lt;ffffffff8c05c9a5&amp;gt;] SyS_read+0x55/0xd0
[  257.958780]  [&amp;lt;ffffffff8c5c639a&amp;gt;] system_call_fastpath+0x25/0x2a

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;.....&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[ 8320.870019] BUG: unable to handle kernel NULL pointer dereference at 00000000000001ca
[ 8320.872531] IP: [&amp;lt;ffffffff98db7459&amp;gt;] rht_deferred_worker+0x209/0x430
[ 8320.874773] PGD 0
[ 8320.876458] Oops: 0000 [#1] SMP
[ 8320.904160] CPU: 13 PID: 3272 Comm: kworker/13:1 Kdump: loaded Tainted: G           OE  ------------ T 3.10.0-1160.88.1.el7_lustre.ddn17.x86_64 #1
[ 8320.907100] Hardware name: DDN SFA400NVX2E, BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
[ 8320.909544] Workqueue: events rht_deferred_worker
[ 8320.911387] task: ffff89c6dfdb3180 ti: ffff89e8c3994000 task.ti: ffff89e8c3994000
[ 8320.913572] RIP: 0010:[&amp;lt;ffffffff98db7459&amp;gt;]  [&amp;lt;ffffffff98db7459&amp;gt;] rht_deferred_worker+0x209/0x430
[ 8320.939508] Call Trace:
[ 8320.940810]  [&amp;lt;ffffffff98ac32ef&amp;gt;] process_one_work+0x17f/0x440
[ 8320.942542]  [&amp;lt;ffffffff98ac4436&amp;gt;] worker_thread+0x126/0x3c0
[ 8320.944188]  [&amp;lt;ffffffff98ac4310&amp;gt;] ? manage_workers.isra.26+0x2b0/0x2b0
[ 8320.946001]  [&amp;lt;ffffffff98acb621&amp;gt;] kthread+0xd1/0xe0
[ 8320.947555]  [&amp;lt;ffffffff98acb550&amp;gt;] ? insert_kthread_work+0x40/0x40
[ 8320.949308]  [&amp;lt;ffffffff991c61dd&amp;gt;] ret_from_fork_nospec_begin+0x7/0x21
[ 8320.951057]  [&amp;lt;ffffffff98acb550&amp;gt;] ? insert_kthread_work+0x40/0x40

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment></environment>
        <key id="77465">LU-17033</key>
            <summary>Add RCU protect for export nid operation</summary>
                <type id="9" iconUrl="https://jira.whamcloud.com/images/icons/issuetypes/undefined.png">Question/Request</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="6">Not a Bug</resolution>
                                        <assignee username="ys">Yang Sheng</assignee>
                                    <reporter username="ys">Yang Sheng</reporter>
                        <labels>
                    </labels>
                <created>Wed, 16 Aug 2023 08:09:00 +0000</created>
                <updated>Sat, 23 Sep 2023 10:37:46 +0000</updated>
                            <resolved>Sat, 23 Sep 2023 10:37:46 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="382613" author="gerrit" created="Wed, 16 Aug 2023 08:34:46 +0000"  >&lt;p&gt;&quot;Yang Sheng &amp;lt;ys@whamcloud.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/51957&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/51957&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-17033&quot; title=&quot;Add RCU protect for export nid operation&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-17033&quot;&gt;&lt;del&gt;LU-17033&lt;/del&gt;&lt;/a&gt; obdclass: obd_nid_hash was corruption&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: f0e9b2a27324fe01340f83b9d53937e461fd02b1&lt;/p&gt;</comment>
                            <comment id="382641" author="ys" created="Wed, 16 Aug 2023 11:11:14 +0000"  >&lt;p&gt;Hi, Neil,&lt;/p&gt;

&lt;p&gt;As you asked, faddr2line result:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;
VMrhel7# LANG=C bash   ~/git/linux/scripts/faddr2line vmlinux rht_deferred_worker+0x209/0x430
rht_deferred_worker+0x209/0x430:
rhashtable_rehash_one at lib/rhashtable.c:275
(inlined by) rhashtable_rehash_chain at lib/rhashtable.c:315
(inlined by) rhashtable_rehash_table at lib/rhashtable.c:363
(inlined by) rht_deferred_worker at lib/rhashtable.c:464
.........
       rht_for_each(entry, old_tbl, old_hash) {
                err = 0;
                next = rht_dereference_bucket(entry-&amp;gt;next, old_tbl, old_hash);   &amp;lt;&amp;lt;&amp;lt;--------

                if (rht_is_a_nulls(next))
                        break;

                pprev = &amp;amp;entry-&amp;gt;next;
        }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;The main problem as below:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;for stack:
[  471.820067] BUG: unable to handle kernel NULL pointer dereference at 0000000000000142
[  471.822528] IP: [&amp;lt;ffffffffa07b7536&amp;gt;] rht_deferred_worker+0x226/0x430
[  471.851583] CPU: 23 PID: 316 Comm: kworker/23:2 Kdump: loaded Tainted: G           OE  ------------ T 3.10.0-1160.95.1.el7_lustre.ddn17.x86_64 #1
[  471.854631] Hardware name: DDN SFA400NVX2E, BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
[  471.857301] Workqueue: events rht_deferred_worker
[  471.859330] task: ffff9ed3a5770000 ti: ffff9ed3b6960000 task.ti: ffff9ed3b6960000
[  471.861664] RIP: 0010:[&amp;lt;ffffffffa07b7536&amp;gt;]  [&amp;lt;ffffffffa07b7536&amp;gt;] rht_deferred_worker+0x226/0x430
[  471.864180] RSP: 0018:ffff9ed3b6963da0  EFLAGS: 00010246
[  471.866235] RAX: ffff9ed3e63944b8 RBX: 0000000000000142 RCX: 0000000000000000
[  471.868508] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9ed3d8c46c8c
[  471.870756] RBP: ffff9ed3b6963e18 R08: ffff9ed5d67608b0 R09: 0000000000000598
[  471.872993] R10: 00000000a77101d6 R11: 00000000c7893a1b R12: 0000000000000139
[  471.875213] R13: ffff9ed494bbe000 R14: ffff9ed3e63944b8 R15: ffff9ed457ea2498
[  471.877431] FS:  0000000000000000(0000) GS:ffff9ed6315c0000(0000) knlGS:0000000000000000
[  471.879730] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  471.881757] CR2: 0000000000000142 CR3: 00000023001ea000 CR4: 0000000000760fe0
[  471.883901] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  471.886028] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  471.888141] PKRU: 00000000
[  471.889741] Call Trace:

The table is exp_nid_hash:

crash&amp;gt; bucket_table 0xffff9ed3f1c73000
struct bucket_table {
  size = 256,
  nest = 0,
  rehash = 163,
  hash_rnd = 2859063006,
  locks_mask = 127,
  locks = 0xffff9ed3d8c46c00,
  walkers = {
    next = 0xffff9ed3f1c73020,
    prev = 0xffff9ed3f1c73020
  },
  rcu = {
    next = 0x0,
    func = 0x0
  },
  future_tbl = 0xffff9ed494bbe000,
  buckets = 0xffff9ed3f1c73080
}

Then look into bucket:
.......
ffff9ed3f1c73580:  0000000000000141 0000000000000143   A.......C.......
ffff9ed3f1c73590:  0000000000000145 ffff9ed3e63944b8  &amp;lt;&amp;lt;&amp;lt;&amp;lt;------    E........D9.....
ffff9ed3f1c735a0:  0000000000000149 000000000000014b   I.......K..............
crash&amp;gt; rd ffff9ed3e63944b8
ffff9ed3e63944b8:  0000000000000142 &amp;lt;&amp;lt;&amp;lt;&amp;lt;----- it should be 000147, marker as a null entry, but was set to 0000142.

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Other few of instance also in such case. So i suspect the exp_nid_hash lost some locking or barrier.&lt;/p&gt;


&lt;p&gt;Thanks,&lt;br/&gt;
YangSheng&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                                        </outwardlinks>
                                                        </issuelinktype>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="77477">LU-17034</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i03t47:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>