<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:57:39 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-13018] NULL pointer dereference in osd_attr_get while consuming changelogs and reading changelog_size</title>
                <link>https://jira.whamcloud.com/browse/LU-13018</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;We monitor changelog_size on our MDTs, fetching it every 5 minutes under normal operation.&#160; We also consume changelogs constantly via Starfish.&#160; We occasionally see the following BUG:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: 29422:0:(mdd_device.c:1577:mdd_changelog_clear()) Skipped 22 previous similar messages
BUG: unable to handle kernel NULL pointer dereference at 000000000000001c
IP: [&amp;lt;ffffffffc14d20a2&amp;gt;] osd_attr_get+0x62/0x340 [osd_zfs]
PGD 0 
Oops: 0000 [#1] SMP 
CPU: 4 PID: 21384 Comm: snmpd Kdump: loaded Tainted: P &#160; &#160; &#160; &#160; &#160; OE&#160; ------------ T 3.10.0-1062.1.1.1chaos.ch6.x86_64 #1
Hardware name: Intel Corporation S2600WTTR/S2600WTTR, BIOS SE5C610.86B.01.01.0016.033120161139 03/31/2016
task: ffff9b1a2ab2b150 ti: ffff9b170ece0000 task.ti: ffff9b170ece0000
RIP: 0010:[&amp;lt;ffffffffc14d20a2&amp;gt;]&#160; [&amp;lt;ffffffffc14d20a2&amp;gt;] osd_attr_get+0x62/0x340 [osd_zfs]
Call Trace:
 [&amp;lt;ffffffffc12f73b5&amp;gt;] llog_size+0x35/0xe0 [obdclass]
 [&amp;lt;ffffffffc13006fe&amp;gt;] ? llog_cat_id2handle+0x30e/0x5b0 [obdclass]
 [&amp;lt;ffffffffb9232fb8&amp;gt;] ? kmem_cache_alloc+0x48/0x240
 [&amp;lt;ffffffffc1300b9c&amp;gt;] llog_cat_size_cb+0x1fc/0x3d0 [obdclass]
 [&amp;lt;ffffffffc12fb9db&amp;gt;] llog_process_thread+0x87b/0x1470 [obdclass]
 [&amp;lt;ffffffffb9232c89&amp;gt;] ? ___slab_alloc+0x209/0x4f0
 [&amp;lt;ffffffffc13009a0&amp;gt;] ? llog_cat_id2handle+0x5b0/0x5b0 [obdclass]
 [&amp;lt;ffffffffc12fc68c&amp;gt;] llog_process_or_fork+0xbc/0x450 [obdclass]
 [&amp;lt;ffffffffc13009a0&amp;gt;] ? llog_cat_id2handle+0x5b0/0x5b0 [obdclass]
 [&amp;lt;ffffffffc1300f09&amp;gt;] llog_cat_process_or_fork+0x199/0x2a0 [obdclass]
 [&amp;lt;ffffffffc1301098&amp;gt;] llog_cat_size+0x58/0x80 [obdclass]
 [&amp;lt;ffffffffc19d7a32&amp;gt;] mdd_changelog_size_ctxt+0x92/0x320 [mdd]
 [&amp;lt;ffffffffc19d7d35&amp;gt;] mdd_changelog_size_seq_show+0x75/0xe0 [mdd]
 [&amp;lt;ffffffffb9284d18&amp;gt;] seq_read+0x138/0x460
 [&amp;lt;ffffffffb92d94b0&amp;gt;] proc_reg_read+0x40/0x80
 [&amp;lt;ffffffffb925a71c&amp;gt;] vfs_read+0xbc/0x1c0
 [&amp;lt;ffffffffb925b68f&amp;gt;] SyS_read+0x7f/0xf0
 [&amp;lt;ffffffffb97ba11e&amp;gt;] system_call_fastpath+0x25/0x2a &lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</description>
                <environment>zfs-0.7.11-9llnl&lt;br/&gt;
Lustre 2.10.8_4.chaos&lt;br/&gt;
See &lt;a href=&quot;https://github.com/LLNL/lustre/commits/2.10.8_4.chaos&quot;&gt;https://github.com/LLNL/lustre/commits/2.10.8_4.chaos&lt;/a&gt;&lt;br/&gt;
RHEL 7.7 based OS</environment>
        <key id="57472">LU-13018</key>
            <summary>NULL pointer dereference in osd_attr_get while consuming changelogs and reading changelog_size</summary>
                <type id="3" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11318&amp;avatarType=issuetype">Task</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="tappro">Mikhail Pershin</assignee>
                                    <reporter username="ofaaland">Olaf Faaland</reporter>
                        <labels>
                            <label>llnl</label>
                    </labels>
                <created>Tue, 26 Nov 2019 18:25:59 +0000</created>
                <updated>Sat, 1 Feb 2020 16:13:21 +0000</updated>
                            <resolved>Sat, 1 Feb 2020 16:13:13 +0000</resolved>
                                    <version>Lustre 2.10.8</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="258854" author="ofaaland" created="Tue, 26 Nov 2019 18:27:24 +0000"  >&lt;p&gt;I haven&apos;t tried the obvious thing of reading changelog_size in a tight loop while clearing changelogs to see how easy it is to reproduce.&#160; I&apos;ll do that today.&lt;/p&gt;</comment>
                            <comment id="258855" author="pjones" created="Tue, 26 Nov 2019 18:47:23 +0000"  >&lt;p&gt;Mike&lt;/p&gt;

&lt;p&gt;Could you please advise?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="258859" author="ofaaland" created="Tue, 26 Nov 2019 19:24:10 +0000"  >&lt;p&gt;I wasn&apos;t able to quickly reproduce it as described above.&#160; I&apos;ll let you know if running for a longer duration works.&lt;/p&gt;</comment>
                            <comment id="258896" author="tappro" created="Wed, 27 Nov 2019 10:58:00 +0000"  >&lt;p&gt;Olaf, is that possible to find out what is happening at 0x62 there:&lt;br/&gt;
 &lt;tt&gt;RIP: 0010:&lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc14d20a2&amp;gt;&amp;#93;&lt;/span&gt; &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffc14d20a2&amp;gt;&amp;#93;&lt;/span&gt; osd_attr_get+0x62/0x340 &lt;span class=&quot;error&quot;&gt;&amp;#91;osd_zfs&amp;#93;&lt;/span&gt;&lt;/tt&gt;&lt;br/&gt;
 or maybe attach osd_object.o binary file or .ko module?&lt;/p&gt;</comment>
                            <comment id="259079" author="ofaaland" created="Tue, 3 Dec 2019 17:12:21 +0000"  >&lt;p&gt;Mike,&lt;/p&gt;

&lt;p&gt;Sorry, I meant to do that in the first place.&#160; Here you go:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;(gdb) l *(osd_attr_get+0x62)
0x90d2 is in osd_attr_get (/usr/src/debug/lustre-2.10.8_4.chaos/lustre/osd-zfs/osd_object.c:788).
783		uint32_t		 blksize;
784		int			 rc = 0;
785	
786		down_read(&amp;amp;obj-&amp;gt;oo_guard);
787	
788		if (unlikely(!dt_object_exists(dt) || obj-&amp;gt;oo_destroyed))
789			GOTO(out, rc = -ENOENT);
790	
791		if (unlikely(fid_is_acct(lu_object_fid(&amp;amp;dt-&amp;gt;do_lu))))
792			GOTO(out, rc = 0); 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Thanks&lt;/p&gt;</comment>
                            <comment id="259483" author="tappro" created="Mon, 9 Dec 2019 18:52:20 +0000"  >&lt;p&gt;As I understand this offset 0x1c is for &lt;tt&gt;loh_attr&lt;/tt&gt; field in lo_header while checking &lt;tt&gt;dt_object_exists()&lt;/tt&gt;. That means we have a race here between llog deletion and polling its size. I will check related code.&lt;/p&gt;

&lt;p&gt;Olaf, can this be reproduced easily or this is quite rare situation?&lt;/p&gt;</comment>
                            <comment id="259489" author="ofaaland" created="Mon, 9 Dec 2019 19:15:51 +0000"  >&lt;p&gt;Hi Mike,&lt;/p&gt;

&lt;p&gt;Thanks for looking.&#160; I haven&apos;t been able to reproduce it so far.&#160; It has not been frequent in production, but has happened multiple times.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="259518" author="tappro" created="Tue, 10 Dec 2019 09:53:59 +0000"  >&lt;p&gt;Olaf, speaking of possible use-after-free for &lt;tt&gt;lu_object&lt;/tt&gt;, there is NULL pointer and I wonder why it is not poisoned, can you check kernel config options for SLAB_DEBUG or any other poisoning things? Are they turned off? I mean that if allocation debug is being used then this case is unlikely use-after-free&lt;/p&gt;</comment>
                            <comment id="259653" author="ofaaland" created="Wed, 11 Dec 2019 23:18:15 +0000"  >&lt;p&gt;Hi Mike,&lt;/p&gt;

&lt;p&gt;I know of the concept but don&apos;t know the different kernel config options to search for.&#160; Here&apos;s one - let me know what others to look for.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@zinc2:~]# grep SLUB_DEBUG /boot/config-3.10.0-1062.1.1.1chaos.ch6.x86_64
CONFIG_SLUB_DEBUG=y
# CONFIG_SLUB_DEBUG_ON is not set&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;I confirmed that all of /sys/kernel/slab/*/poison are &quot;0&quot;, which I believe means no poisoning is being done for that memory.&#160; If I&apos;m wrong, let me know.&lt;/p&gt;

&lt;p&gt;The kernel is a patched version of RHEL 7.7 kernel,&#160;3.10.0-1062.1.1.1chaos.ch6.x86_64.&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="259666" author="ofaaland" created="Thu, 12 Dec 2019 01:16:59 +0000"  >&lt;p&gt;For my reference: My local issue is TOSS4656&lt;/p&gt;</comment>
                            <comment id="262126" author="tappro" created="Wed, 29 Jan 2020 15:20:30 +0000"  >&lt;p&gt;this issue has the same reason as &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10198&quot; title=&quot;GPF llog_osd_declare_write_rec+0xb6/0x3d0 &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10198&quot;&gt;&lt;del&gt;LU-10198&lt;/del&gt;&lt;/a&gt;, I will prepate patch soon&lt;/p&gt;</comment>
                            <comment id="262163" author="tappro" created="Wed, 29 Jan 2020 22:09:00 +0000"  >&lt;p&gt;Patch was added under &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-10198&quot; title=&quot;GPF llog_osd_declare_write_rec+0xb6/0x3d0 &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-10198&quot;&gt;&lt;del&gt;LU-10198&lt;/del&gt;&lt;/a&gt;, here is the link: &lt;a href=&quot;https://review.whamcloud.com/#/c/37367/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/#/c/37367/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="262167" author="ofaaland" created="Wed, 29 Jan 2020 23:02:59 +0000"  >&lt;p&gt;Thanks, Mikhail.&#160; You can close this issue as dupe.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="49108">LU-10198</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00pzz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                </customfields>
    </item>
</channel>
</rss>