<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:32:22 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-17069] Crash when writing to a deactivated OSC</title>
                <link>https://jira.whamcloud.com/browse/LU-17069</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This does not impact Lustre version above 2.15.&lt;/p&gt;

&lt;p&gt;Call Trace:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[12029.369503] BUG: unable to handle kernel NULL pointer dereference at           (null)  
[12029.369605] IP: [&amp;lt;ffffffffc1041a67&amp;gt;] osc_lru_reserve+0x27/0x170 [osc]                  
[12029.369702] PGD 800000006895f067 PUD 4a719067 PMD 0                                    
[12029.369767] Oops: 0000 [#1] SMP                                                        
...
[12029.389698] RIP: 0010:[&amp;lt;ffffffffc1041a67&amp;gt;]  [&amp;lt;ffffffffc1041a67&amp;gt;] osc_lru_reserve+0x27/0x170 [osc]         
[12029.391039] RSP: 0018:ffff9756a8b83a70  EFLAGS: 00010206                                                  
[12029.392217] RAX: ffff97567fb4b000 RBX: ffff9756ba9cab88 RCX: 0000000000000000                             
[12029.393346] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9756818946c0                             
[12029.394480] RBP: ffff9756a8b83a88 R08: 0000000000000001 R09: 0000000000000000                             
[12029.395523] R10: ffff9756a4b6cf28 R11: 000000000000000f R12: ffff9756818946c0                             
[12029.396445] R13: 0000000000020000 R14: ffff9756ba9cab88 R15: ffff975682b0ba00                             
[12029.397361] FS:  00007efdb61e4740(0000) GS:ffff9756bfd00000(0000) knlGS:0000000000000000                  
[12029.398336] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033                                             
[12029.399246] CR2: 0000000000000000 CR3: 000000004cf86000 CR4: 00000000000606e0                             
[12029.400145] Call Trace:                                                                                   
[12029.400914]  [&amp;lt;ffffffffc1047ac6&amp;gt;] osc_io_write_iter_init+0x86/0x1e0 [osc]                                 
[12029.401708]  [&amp;lt;ffffffffc0afb1bf&amp;gt;] cl_io_iter_init+0x5f/0x120 [obdclass]                                   
[12029.402488]  [&amp;lt;ffffffffc10d4ff3&amp;gt;] lov_io_add_sub.isra.34+0xc3/0x380 [lov]                                 
[12029.403245]  [&amp;lt;ffffffffc10d9967&amp;gt;] lov_io_iter_init+0x257/0x720 [lov]                                      
[12029.403974]  [&amp;lt;ffffffffc10da38a&amp;gt;] lov_io_rw_iter_init+0x38a/0x520 [lov]                                   
[12029.404717]  [&amp;lt;ffffffffc0afb1bf&amp;gt;] cl_io_iter_init+0x5f/0x120 [obdclass]                                   
[12029.405401]  [&amp;lt;ffffffffc0afd4d2&amp;gt;] cl_io_loop+0x42/0x1c0 [obdclass]                                        
[12029.406016]  [&amp;lt;ffffffffc169a3bb&amp;gt;] ll_file_io_generic+0x63b/0xc90 [lustre]                                 
[12029.406637]  [&amp;lt;ffffffffc169aea9&amp;gt;] ll_file_aio_write+0x289/0x660 [lustre]                                  
[12029.407251]  [&amp;lt;ffffffffc169b380&amp;gt;] ll_file_write+0x100/0x1c0 [lustre]                                      
[12029.407850]  [&amp;lt;ffffffff9d44e590&amp;gt;] vfs_write+0xc0/0x1f0                                                    
[12029.408448]  [&amp;lt;ffffffff9d9aaed5&amp;gt;] ? system_call_after_swapgs+0xa2/0x13a                                   
[12029.409027]  [&amp;lt;ffffffff9d44f36f&amp;gt;] SyS_write+0x7f/0xf0                                                     
[12029.409603]  [&amp;lt;ffffffff9d9aaed5&amp;gt;] ? system_call_after_swapgs+0xa2/0x13a                                   
[12029.410153]  [&amp;lt;ffffffff9d9aaf92&amp;gt;] system_call_fastpath+0x25/0x2a                                          
[12029.410649]  [&amp;lt;ffffffff9d9aaed5&amp;gt;] ? system_call_after_swapgs+0xa2/0x13a                                   
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Code:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-c&quot;&gt;
&lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;unsigned&lt;/span&gt;&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;&lt;/span&gt; osc_lru_reserve(&lt;span class=&quot;code-keyword&quot;&gt;struct&lt;/span&gt; client_obd *cli, &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;unsigned&lt;/span&gt;&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;&lt;/span&gt; npages)             
{                                                                                       
        &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;unsigned&lt;/span&gt;&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;&lt;/span&gt; reserved = 0;                                                     
        &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;unsigned&lt;/span&gt;&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;&lt;/span&gt; max_pages;                                                        
        &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;unsigned&lt;/span&gt;&lt;/span&gt; &lt;span class=&quot;code-keyword&quot;&gt;&lt;span class=&quot;code-object&quot;&gt;long&lt;/span&gt;&lt;/span&gt; c;                                                                
                                                                                        
        /* reserve a full RPC window at most to avoid that a thread accidentally        
         * consumes too many LRU slots */                                               
        max_pages = cli-&amp;gt;cl_max_pages_per_rpc * cli-&amp;gt;cl_max_rpcs_in_flight;             
        if (npages &amp;gt; max_pages)                                                         
                npages = max_pages;                                                     
                                                                                        
        c = atomic_long_read(cli-&amp;gt;cl_lru_left);                           &amp;lt;-------- Crash              
....
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Reproducer:&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;client&amp;gt; lfs setstripe -c1 -i0 test&lt;/li&gt;
	&lt;li&gt;mgs&amp;gt; lctl conf_param lustrefs-OST0000.osc.active=0&lt;/li&gt;
	&lt;li&gt;client&amp;gt; umount /mnt/lustre &amp;amp;&amp;amp; mount /mnt/lustre&lt;/li&gt;
	&lt;li&gt;client&amp;gt; dd if=/dev/zero of=yep bs=1M count=1 conv=notrunc&lt;br/&gt;
==&amp;gt; Crash&lt;/li&gt;
&lt;/ol&gt;


&lt;p&gt;The issue is present uniquely after remounting the client this way the client will not try to connect to the OST and will not init the LRU cache:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;lctl dl
...
4 UP osc lustrefs-OST0000-osc-ffff8cb84915e000
...

crash&amp;gt; p obd_devs[4]-&amp;gt;u.cli.cl_lru_left      
$3 = (atomic_long_t *) 0x0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Most of the jobs do not cause a crash because most of the time they do some operation before a write (e.g: stats, seek etc...). Those operations are not impacted by this bug and will return an EIO error (in osc_io_iter_init()).&lt;/p&gt;

&lt;p&gt;This is the same crash that &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-11658&quot; title=&quot;The cl_cache may be uninitialized while osc is activating&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-11658&quot;&gt;&lt;del&gt;LU-11658&lt;/del&gt;&lt;/a&gt; (but the patches did not fix fully the issue).&lt;/p&gt;</description>
                <environment></environment>
        <key id="77672">LU-17069</key>
            <summary>Crash when writing to a deactivated OSC</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="2">Won&apos;t Fix</resolution>
                                        <assignee username="eaujames">Etienne Aujames</assignee>
                                    <reporter username="eaujames">Etienne Aujames</reporter>
                        <labels>
                    </labels>
                <created>Thu, 31 Aug 2023 09:58:32 +0000</created>
                <updated>Thu, 31 Aug 2023 13:20:41 +0000</updated>
                            <resolved>Thu, 31 Aug 2023 12:42:17 +0000</resolved>
                                    <version>Lustre 2.12.9</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="384367" author="eaujames" created="Thu, 31 Aug 2023 10:12:10 +0000"  >&lt;p&gt;On 2.15 Lustre, the issue seems to have been resolved by  &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12142&quot; title=&quot;Hang in OSC on eviction - threads stuck in read() and ldlm_bl_NN&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12142&quot;&gt;&lt;del&gt;LU-12142&lt;/del&gt;&lt;/a&gt; clio: fix hang on urgent cached pages (&lt;a href=&quot;https://review.whamcloud.com/40237&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/40237&lt;/a&gt;). osc_lru_reserve() is called after the osc_io_iter_init() in a separate callback.&lt;/p&gt;

&lt;p&gt;But the backport is too complicated on 2.12: too much change in clio. I will push a special patch for 2.12.&lt;/p&gt;</comment>
                            <comment id="384370" author="pjones" created="Thu, 31 Aug 2023 12:42:17 +0000"  >&lt;p&gt;Nice detective work Etienne but the community 2.12.x branch is no longer active so I will close out this ticket&lt;/p&gt;</comment>
                            <comment id="384379" author="gerrit" created="Thu, 31 Aug 2023 13:10:13 +0000"  >&lt;p&gt;&quot;Etienne AUJAMES &amp;lt;eaujames@ddn.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/52197&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/52197&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-17069&quot; title=&quot;Crash when writing to a deactivated OSC&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-17069&quot;&gt;&lt;del&gt;LU-17069&lt;/del&gt;&lt;/a&gt; osc: check for invalid import before osc_lru_reserve&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: a5193dc97a38c4d41dc9b95eb8c8bc11425336c1&lt;/p&gt;</comment>
                            <comment id="384384" author="eaujames" created="Thu, 31 Aug 2023 13:20:41 +0000"  >&lt;p&gt;Thanks Peter,&lt;/p&gt;

&lt;p&gt;The CEA are still using the 2.12 (they are testing their first 2.15 FS now), so I have pushed the patch for 2.12 to resolve the current production issue.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="54001">LU-11658</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i03ubj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>