<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:25:36 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2485] NULL pointer dereference in lustre_swab_lov_user_md_common</title>
                <link>https://jira.whamcloud.com/browse/LU-2485</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;We repeatedly hit this problem on our Grove-Production MDS today:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;BUG: unable to handle kernel NULL pointer dereference at 000000000000001c       
IP: [&amp;lt;ffffffffa08bcdb7&amp;gt;] lustre_swab_lov_user_md_common+0x27/0x4e0 [ptlrpc]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; bt                                                                       
PID: 738    TASK: ffff881778c9caa0  CPU: 14  COMMAND: &quot;mdt00_006&quot;               
 #0 [ffff88175b907370] machine_kexec at ffffffff8103216b                        
 #1 [ffff88175b9073d0] crash_kexec at ffffffff810b8d12                          
 #2 [ffff88175b9074a0] oops_end at ffffffff814f2c00                             
 #3 [ffff88175b9074d0] no_context at ffffffff810423fb                           
 #4 [ffff88175b907520] __bad_area_nosemaphore at ffffffff81042685               
 #5 [ffff88175b907570] bad_area_nosemaphore at ffffffff81042753                 
 #6 [ffff88175b907580] __do_page_fault at ffffffff81042e0d                      
 #7 [ffff88175b9076a0] do_page_fault at ffffffff814f4bde                        
 #8 [ffff88175b9076d0] page_fault at ffffffff814f1f95                           
    [exception RIP: lustre_swab_lov_user_md_common+39]                          
    RIP: ffffffffa08bcdb7  RSP: ffff88175b907780  RFLAGS: 00010246              
    RAX: 0000000000000001  RBX: 0000000000000000  RCX: 0000000000000000         
    RDX: ffffffffa090961a  RSI: 0000000000000000  RDI: 0000000000000000         
    RBP: ffff88175b907790   R8: ffff88175b937000   R9: ffff88175b8910d0         
    R10: 0000000000000001  R11: 00000000fffffff3  R12: ffff8817ec176000         
    R13: ffff88175c222468  R14: ffffc9013311e208  R15: ffff8817ec176000         
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018                              
 #9 [ffff88175b907798] lustre_swab_lov_user_md_v3 at ffffffffa08bd2ad [ptlrpc]  
#10 [ffff88175b9077b8] lod_qos_prep_create at ffffffffa0b6bf77 [lod]            
#11 [ffff88175b907858] lod_declare_striped_object at ffffffffa0b66c7b [lod]     
#12 [ffff88175b9078b8] lod_declare_xattr_set at ffffffffa0b67b9d [lod]          
#13 [ffff88175b907918] mdd_create_data at ffffffffa0bf4c00 [mdd]                
#14 [ffff88175b907978] mdt_finish_open at ffffffffa0c794f8 [mdt]                
#15 [ffff88175b907a08] mdt_open_by_fid_lock at ffffffffa0c7a5a7 [mdt]           
#16 [ffff88175b907a78] mdt_reint_open at ffffffffa0c7ac5f [mdt]                 
#17 [ffff88175b907b58] mdt_reint_rec at ffffffffa0c66a21 [mdt]                  
#18 [ffff88175b907b78] mdt_reint_internal at ffffffffa0c601b3 [mdt]             
#19 [ffff88175b907bb8] mdt_intent_reint at ffffffffa0c6077d [mdt]               
#20 [ffff88175b907c08] mdt_intent_policy at ffffffffa0c5c38e [mdt]              
#21 [ffff88175b907c48] ldlm_lock_enqueue at ffffffffa0872b91 [ptlrpc]           
#22 [ffff88175b907ca8] ldlm_handle_enqueue0 at ffffffffa089a837 [ptlrpc]        
#23 [ffff88175b907d18] mdt_enqueue at ffffffffa0c5bf16 [mdt]                    
#24 [ffff88175b907d38] mdt_handle_common at ffffffffa0c4fdd2 [mdt]              
#25 [ffff88175b907d88] mdt_regular_handle at ffffffffa0c50cd5 [mdt]             
#26 [ffff88175b907d98] ptlrpc_server_handle_request at ffffffffa08ca8fc [ptlrpc]
#27 [ffff88175b907e98] ptlrpc_main at ffffffffa08cbeec [ptlrpc]                 
#28 [ffff88175b907f48] kernel_thread at ffffffff8100c14a 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Recovery was manually aborted, which cleared up the issue:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;lctl --device 5 abort_recovery
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Prior to the manual intervention, the node would continuously crash after recovery for about 12 hours.&lt;/p&gt;</description>
                <environment></environment>
        <key id="16915">LU-2485</key>
            <summary>NULL pointer dereference in lustre_swab_lov_user_md_common</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="prakash">Prakash Surya</reporter>
                        <labels>
                            <label>LB</label>
                            <label>sequoia</label>
                    </labels>
                <created>Wed, 12 Dec 2012 12:58:38 +0000</created>
                <updated>Sat, 22 Dec 2012 00:40:39 +0000</updated>
                            <resolved>Sat, 22 Dec 2012 00:40:39 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                    <version>Lustre 2.1.4</version>
                                    <fixVersion>Lustre 2.4.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>6</watches>
                                                                            <comments>
                            <comment id="49141" author="morrone" created="Wed, 12 Dec 2012 13:23:40 +0000"  >&lt;p&gt;I probably caused this with the attached test1.c file.  It was an early pass at testing setting striping through xattrs.  The test program isn&apos;t correct in places, but I am attaching anyway since that is likely the one that triggered the MDS crash.&lt;/p&gt;

&lt;p&gt;Client was a 64k page ppc64 node, MDS is a normal x86_64 node.&lt;/p&gt;</comment>
                            <comment id="49142" author="bzzz" created="Wed, 12 Dec 2012 13:27:42 +0000"  >&lt;p&gt;please try with &lt;a href=&quot;http://review.whamcloud.com/4814&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4814&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="49148" author="adilger" created="Wed, 12 Dec 2012 14:03:03 +0000"  >&lt;p&gt;lol, I was just going to ask whether this was caused by Chris&apos; testing, after having just read his previous comment.&lt;/p&gt;

&lt;p&gt;Fortunately&lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/help_16.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt; this only appears to be hit with big-endian clients, so getting a fix into 2.1.4 and 2.4.0 and at LLNL should cover most of the users.  I thought it would mean that we couldn&apos;t use the fsetxattr() code safely on 2.4 at all since it would crash 2.1.x MDSes.&lt;/p&gt;</comment>
                            <comment id="49169" author="prakash" created="Wed, 12 Dec 2012 19:38:44 +0000"  >&lt;p&gt;Alex, I&apos;ve pulled that in.&lt;/p&gt;</comment>
                            <comment id="49588" author="pjones" created="Sat, 22 Dec 2012 00:40:39 +0000"  >&lt;p&gt;Landed for 2.4&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="12095" name="test1.c" size="2615" author="morrone" created="Wed, 12 Dec 2012 13:23:40 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvdq7:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>5830</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>