<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:20:28 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-1878] NULL pointer in ll_readahead()</title>
                <link>https://jira.whamcloud.com/browse/LU-1878</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Kernel OOPS on a client&lt;/p&gt;

&lt;p&gt;&amp;lt;1&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;1740431.005226&amp;#93;&lt;/span&gt; BUG: unable to handle kernel NULL pointer dereference at (null)&lt;br/&gt;
&amp;lt;1&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;1740431.008694&amp;#93;&lt;/span&gt; IP: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a9dfda&amp;gt;&amp;#93;&lt;/span&gt; ll_readahead+0x4a/0x1d20 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;lt;4&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;1740431.008694&amp;#93;&lt;/span&gt; PGD 2cffabf5067 PUD 2cffabf4067 PMD 0&lt;br/&gt;
&amp;lt;0&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;1740431.008694&amp;#93;&lt;/span&gt; Oops: 0000 &lt;a href=&quot;#1&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot;&gt;1&lt;/a&gt; SMP&lt;br/&gt;
&amp;lt;0&amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;1740431.008694&amp;#93;&lt;/span&gt; last sysfs file: /sys/devices/system/node/node63/meminfo&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;archkdbcommon&amp;#93;&lt;/span&gt;kdb&amp;gt;   -bt&lt;/p&gt;

&lt;p&gt;Stack traceback for pid 839153&lt;br/&gt;
0xffff8acffbd72600   000000   000000  0  000   R  0xffff8acffbd72c90 *magic.exe&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a9dfda&amp;gt;&amp;#93;&lt;/span&gt; ll_readahead+0x4a/0x1d20 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0aa0ebb&amp;gt;&amp;#93;&lt;/span&gt; ll_readpage+0x120b/0x1760 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810b5cf1&amp;gt;&amp;#93;&lt;/span&gt; filemap_fault+0x241/0x3d0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0aaebd7&amp;gt;&amp;#93;&lt;/span&gt; ll_fault+0x3b7/0xdd0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810ceae7&amp;gt;&amp;#93;&lt;/span&gt; __do_fault+0x57/0x520&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810d3459&amp;gt;&amp;#93;&lt;/span&gt; handle_mm_fault+0x199/0x430&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8139dcaf&amp;gt;&amp;#93;&lt;/span&gt; do_page_fault+0x1bf/0x3e0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8139b5cf&amp;gt;&amp;#93;&lt;/span&gt; page_fault+0x1f/0x30&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;0000000000472d64&amp;gt;&amp;#93;&lt;/span&gt; 0x472d64&lt;/p&gt;</description>
                <environment></environment>
        <key id="15865">LU-1878</key>
            <summary>NULL pointer in ll_readahead()</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="2">Won&apos;t Fix</resolution>
                                        <assignee username="bogl">Bob Glossman</assignee>
                                    <reporter username="vs">Vladimir V. Saveliev</reporter>
                        <labels>
                    </labels>
                <created>Mon, 10 Sep 2012 17:00:56 +0000</created>
                <updated>Mon, 29 May 2017 05:23:12 +0000</updated>
                            <resolved>Mon, 29 May 2017 05:23:12 +0000</resolved>
                                    <version>Lustre 1.8.x (1.8.0 - 1.8.5)</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="44537" author="vs" created="Mon, 10 Sep 2012 17:18:11 +0000"  >&lt;p&gt;The BUG happened because ll_readhead() was called from ll_readpage() with&lt;br/&gt;
mapping == NULL, so the line&lt;/p&gt;

&lt;p&gt;                inode = mapping-&amp;gt;host&lt;br/&gt;
caused the oops.&lt;/p&gt;

&lt;p&gt;At the begining of ll_readpage() page-&amp;gt;mapping was not NULL, because the line&lt;br/&gt;
        struct inode *inode = page-&amp;gt;mapping-&amp;gt;host;&lt;br/&gt;
worked fine. Therefore, page-&amp;gt;mapping changed to NULL while running ll_readpage().&lt;/p&gt;


&lt;p&gt;At first sight it looks like the page kept locked all the time at ll_readpage().&lt;br/&gt;
However, the following race seems to be possible:&lt;/p&gt;

&lt;p&gt;Process 1:&lt;br/&gt;
ll_readpage() called via handle_mm_fault()&lt;del&gt;&amp;gt;__do_fault()&lt;/del&gt;&amp;gt;ll_fault()-&amp;gt;filemap_fault():&lt;br/&gt;
    starts with locked page P and calls ll_issue_page_read() which queues the page for i/o.&lt;br/&gt;
    Once the page is queued it can be involved into group i/o triggered by another thread.&lt;/p&gt;

&lt;p&gt;Process 2:&lt;br/&gt;
obd_trigger_group_io()&lt;br/&gt;
    triggers i/o for group of queued pages, including the page P. The P gets unlocked and uptodate as result of that.&lt;/p&gt;

&lt;p&gt;Process 3:&lt;br/&gt;
shrink_slab()&lt;del&gt;&amp;gt;llap_shrink_cache()&lt;/del&gt;&amp;gt;llap_shrink_cache_internal()&lt;br/&gt;
        kswapd shrinks slabs and calls ll_shrink_cache() (registered lustre&apos;s slab shrinker, which tries to shrink lustre pages.&lt;br/&gt;
        llap_shrink_cache_internal() locks the page P and calls ll_truncate_complete_page() which removes the page P from page cache (page-&amp;gt;mapping becomes NULL).&lt;br/&gt;
        llap_shrink_cache_internal() avoids to ll_truncate_complete_page() dirty pages, but it does not care of busy pages: the ones with increase reference count.&lt;/p&gt;

&lt;p&gt;Process 1 continues with page-&amp;gt;mapping == NULL and calls ll_readahead() passing NULL as 3rd argument:&lt;br/&gt;
ll_readpage()&lt;br/&gt;
...&lt;br/&gt;
        /* We have just requested the actual page we want, see if we can tack&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;on some readahead to that page&apos;s RPC before it is sent. */&lt;br/&gt;
        if (ll_i2sbi(inode)-&amp;gt;ll_ra_info.ra_max_pages_per_file)&lt;br/&gt;
                ll_readahead(&amp;amp;fd-&amp;gt;fd_ras, exp, page-&amp;gt;mapping, oig,&lt;br/&gt;
                             fd-&amp;gt;fd_flags);&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;ll_readahead()&lt;br/&gt;
{&lt;br/&gt;
...&lt;br/&gt;
        inode = mapping-&amp;gt;host;            &amp;lt;&amp;lt;&amp;lt;== NULL pointer dereference happens here, as long as mapping is NULL&lt;br/&gt;
...&lt;/p&gt;


&lt;p&gt;Dump of stacktraces provided by HLRN (&lt;a href=&quot;https://bugzilla.lustre.org/attachment.cgi?id=33148&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://bugzilla.lustre.org/attachment.cgi?id=33148&lt;/a&gt;) contains number of processes:&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;handling pagefaults of lustre pages looking like:&lt;br/&gt;
0xffff8acffbd72600   839153   838417  1  343   R  0xffff8acffbd72c90 *magic.exe&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a9dfda&amp;gt;&amp;#93;&lt;/span&gt; ll_readahead+0x4a/0x1d20 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0aa0ebb&amp;gt;&amp;#93;&lt;/span&gt; ll_readpage+0x120b/0x1760 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810b5cf1&amp;gt;&amp;#93;&lt;/span&gt; filemap_fault+0x241/0x3d0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0aaebd7&amp;gt;&amp;#93;&lt;/span&gt; ll_fault+0x3b7/0xdd0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810ceae7&amp;gt;&amp;#93;&lt;/span&gt; __do_fault+0x57/0x520&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810d3459&amp;gt;&amp;#93;&lt;/span&gt; handle_mm_fault+0x199/0x430&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8139dcaf&amp;gt;&amp;#93;&lt;/span&gt; do_page_fault+0x1bf/0x3e0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8139b5cf&amp;gt;&amp;#93;&lt;/span&gt; page_fault+0x1f/0x30&lt;/li&gt;
	&lt;li&gt;processes which are entered lustre shrink code trying to allocate memory and kswapd&apos;s shrinking slabs with typical trace like:&lt;br/&gt;
0xffff881072408680     7239        2  0  965   D  0xffff881072408d10  kswapd56&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81399094&amp;gt;&amp;#93;&lt;/span&gt; thread_return+0x0/0x34c&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810cf589&amp;gt;&amp;#93;&lt;/span&gt; mapping_sleep+0x9/0x10&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8139986a&amp;gt;&amp;#93;&lt;/span&gt; __wait_on_bit_lock+0x4a/0xa0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81399939&amp;gt;&amp;#93;&lt;/span&gt; out_of_line_wait_on_bit_lock+0x79/0xa0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810d2309&amp;gt;&amp;#93;&lt;/span&gt; unmap_mapping_range+0x149/0x310&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0aae6c7&amp;gt;&amp;#93;&lt;/span&gt; ll_teardown_mmaps+0x67/0x1c0 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0aa2072&amp;gt;&amp;#93;&lt;/span&gt; llap_shrink_cache+0x5a2/0xa40 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0a83763&amp;gt;&amp;#93;&lt;/span&gt; ll_shrink_cache+0x33/0x70 &lt;span class=&quot;error&quot;&gt;&amp;#91;lustre&amp;#93;&lt;/span&gt;&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810c3f8c&amp;gt;&amp;#93;&lt;/span&gt; shrink_slab+0x12c/0x190&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810c49d8&amp;gt;&amp;#93;&lt;/span&gt; balance_pgdat+0x4c8/0x7b0&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff810c4de9&amp;gt;&amp;#93;&lt;/span&gt; kswapd+0x129/0x180&lt;br/&gt;
 &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff81064366&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x96/0xa0&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;which shows that the race described above is possible.&lt;br/&gt;
The proposed fix is to avoid to ll_truncate_complete_page() pages having extra reference counters.&lt;/p&gt;
</comment>
                            <comment id="44538" author="vs" created="Mon, 10 Sep 2012 17:25:04 +0000"  >&lt;p&gt;llap_shrink_cache_internal(): avoid shrinking of pages which are in use&lt;/p&gt;</comment>
                            <comment id="44540" author="pjones" created="Mon, 10 Sep 2012 17:29:14 +0000"  >&lt;p&gt;Thanks for the patch Vladimir. Could you please upload it into gerrit so that we can review and test it? There are instructions on the whamcloud wiki or I can get an engineer to step you through the process if that helps.&lt;/p&gt;</comment>
                            <comment id="44541" author="vs" created="Mon, 10 Sep 2012 17:42:40 +0000"  >&lt;p&gt;Hello, Peter&lt;/p&gt;


&lt;p&gt;That is what I was planning to do next. From &lt;a href=&quot;http://wiki.whamcloud.com/display/PUB/Submitting+Changes&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://wiki.whamcloud.com/display/PUB/Submitting+Changes&lt;/a&gt; &lt;br/&gt;
  I realised that JIRA ticket needs to be created first. So, I created  &lt;br/&gt;
&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1878&quot; title=&quot;NULL pointer in ll_readahead()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1878&quot;&gt;&lt;del&gt;LU-1878&lt;/del&gt;&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;I hope that my patch matches&lt;br/&gt;
2. Test and commit your patch locally using acceptance-small.sh and&lt;br/&gt;
3. The patch follows the Requirements for patch submission:&lt;br/&gt;
and I was going to do 4. The patch has been uploaded to Gerrit. On  &lt;br/&gt;
that step I guess that I am to git push the patch somewhere.&lt;/p&gt;


&lt;p&gt;If I went wrong way or there is simple way, please get someone to help  &lt;br/&gt;
me.&lt;/p&gt;

&lt;p&gt;TIA&lt;br/&gt;
Best regards,&lt;br/&gt;
Vladimir&lt;/p&gt;

</comment>
                            <comment id="44543" author="pjones" created="Mon, 10 Sep 2012 18:13:44 +0000"  >&lt;p&gt;Great! It sounds like you are on the right track. Once the patch is in gerrit then you should post the URL to the changeset as a comment in the JIRA ticket. Thanks!&lt;/p&gt;</comment>
                            <comment id="44547" author="vs" created="Mon, 10 Sep 2012 19:54:21 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/3927&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/3927&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="44548" author="vs" created="Mon, 10 Sep 2012 19:58:41 +0000"  >

&lt;p&gt;the patch is in gerrit (&lt;a href=&quot;http://review.whamcloud.com/3927&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/3927&lt;/a&gt;), however, I  &lt;br/&gt;
am not sure about inspectors, please advise.&lt;/p&gt;
</comment>
                            <comment id="44550" author="pjones" created="Mon, 10 Sep 2012 21:00:01 +0000"  >&lt;p&gt;Excellent - thanks Vladimir! Don&apos;t worry about the inspectors - we&apos;ll organize that.&lt;/p&gt;

&lt;p&gt;Bob&lt;/p&gt;

&lt;p&gt;Could you please take care of landing this patch and check in particular its applicability to the master branch&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="45129" author="vs" created="Tue, 18 Sep 2012 06:30:57 +0000"  >&lt;p&gt;&lt;a href=&quot;http://review.whamcloud.com/4026&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4026&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;this is updated version accordingly to Jinshan Xiong&apos;s inspection.&lt;/p&gt;</comment>
                            <comment id="197429" author="adilger" created="Mon, 29 May 2017 05:23:12 +0000"  >&lt;p&gt;Close old ticket.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="11840" name="0001-LU-1878-llite-do-not-shrink-busy-pages.patch" size="1526" author="vs" created="Mon, 10 Sep 2012 17:25:04 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                    <customfield id="customfield_10020" key="com.atlassian.jira.plugin.system.customfieldtypes:float">
                        <customfieldname>Bugzilla ID</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>24376.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10040" key="com.atlassian.jira.plugin.system.customfieldtypes:labels">
                        <customfieldname>Epic</customfieldname>
                        <customfieldvalues>
                                        <label>client</label>
    
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvyfz:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9764</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>