<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:38:43 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-3996]  LustreError: 8136:0:(llog_osd.c:241:llog_osd_read_header()) MGS-osd: error reading log header from [0xa:0xa:0x0]: rc = -14</title>
                <link>https://jira.whamcloud.com/browse/LU-3996</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Running parallel-scale on Hyperion, initialization for iOR test. Formatted with ZFS. &lt;br/&gt;
MDS crashes during test startup.&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;LustreError: 8136:0:(llog_osd.c:241:llog_osd_read_header()) MGS-osd: error reading log header from [0xa:0xa:0x0]: rc = -14
2013-09-23 11:32:12 LustreError: 8136:0:(mgs_llog.c:1386:record_start_log()) MGS: can&apos;t start log lustre-params: rc = -14
2013-09-23 11:32:12 BUG: unable to handle kernel NULL pointer dereference at 00000000000000b8

&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Console log attached.&lt;/p&gt;</description>
                <environment></environment>
        <key id="21091">LU-3996</key>
            <summary> LustreError: 8136:0:(llog_osd.c:241:llog_osd_read_header()) MGS-osd: error reading log header from [0xa:0xa:0x0]: rc = -14</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="1" iconUrl="https://jira.whamcloud.com/images/icons/priorities/blocker.svg">Blocker</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="tappro">Mikhail Pershin</assignee>
                                    <reporter username="cliffw">Cliff White</reporter>
                        <labels>
                    </labels>
                <created>Mon, 23 Sep 2013 18:41:05 +0000</created>
                <updated>Wed, 16 Oct 2013 03:15:42 +0000</updated>
                            <resolved>Wed, 16 Oct 2013 03:15:42 +0000</resolved>
                                    <version>Lustre 2.5.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="67360" author="green" created="Tue, 24 Sep 2013 14:37:04 +0000"  >&lt;p&gt;There&apos;s not enough information to see why llog init fails with EFAULT, but the crash reason is obvious, we fail to test for the llog opening status and try to close not opened llog as the result.&lt;/p&gt;

&lt;p&gt;Patch for the crash is in &lt;a href=&quot;http://review.whamcloud.com/7742&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/7742&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="67555" author="jlevi" created="Wed, 25 Sep 2013 16:10:58 +0000"  >&lt;p&gt;Cliff to provide the additional debug logs.&lt;br/&gt;
Will reassign this ticket once that info has been provided.&lt;/p&gt;</comment>
                            <comment id="67744" author="cliffw" created="Thu, 26 Sep 2013 18:18:02 +0000"  >&lt;p&gt;Reproduced crash on 2.4.93 with panic_on_oops=0 Console log and lctl dk attached.&lt;/p&gt;</comment>
                            <comment id="67795" author="green" created="Fri, 27 Sep 2013 04:54:21 +0000"  >&lt;p&gt;Hm, the lctldk output is too late afte the crash, it starts at 11:11am and end on 11:14am on 26th, and the oops was at 11:10&lt;br/&gt;
lctl dk needs to be run right after the crash (or you need to increase debug log buffer to a bigger value so it does not wrap super fast)&lt;/p&gt;</comment>
                            <comment id="67864" author="cliffw" created="Fri, 27 Sep 2013 19:09:44 +0000"  >&lt;p&gt;dumplog.115529 contains the error. I have included the dumps from 30 seconds before and 30 seconds after.&lt;/p&gt;</comment>
                            <comment id="67874" author="green" created="Fri, 27 Sep 2013 19:59:31 +0000"  >&lt;p&gt;Thanks!&lt;/p&gt;

&lt;p&gt;Ok, so the issue is we are trying to do a 8k read and can read only smaller amount of bytes (not enough logging to see how many) from the referenced llog file.&lt;/p&gt;

&lt;p&gt;Right now it sounds like the underlying mgs filesystem llog is damaged, we should mount it directly and check what&apos;s up with the llog file for &lt;span class=&quot;error&quot;&gt;&amp;#91;0xa:0xa:0x0&amp;#93;&lt;/span&gt; llog, it&apos;s probably way too short?&lt;/p&gt;</comment>
                            <comment id="67940" author="cliffw" created="Mon, 30 Sep 2013 14:21:56 +0000"  >&lt;p&gt;Each time this has failed, it has been on a freshly formatted filesystem. I can replicate and look at the log if that is necessary&lt;/p&gt;</comment>
                            <comment id="67952" author="green" created="Mon, 30 Sep 2013 15:46:35 +0000"  >&lt;p&gt;There&apos;s a fair chance the llog is created in a bad way from format, if this is really the case, there&apos;s no log for this process.&lt;br/&gt;
So Taking a look at llog (making sure it is short after the failure) and then keeping track on it right after reformat even before we mount lustre to confirm would be useful, and then we also need to see exactly how malformed is it, just short size, what&apos;s inside and so on.&lt;/p&gt;</comment>
                            <comment id="68000" author="cliffw" created="Mon, 30 Sep 2013 22:20:27 +0000"  >&lt;p&gt;all logs from CONFIGS directory on MGS after crash, dumped to text with llog_reader. Same error as before: &lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;2013-09-30 14:34:04 LustreError: 8465:0:(llog_osd.c:241:llog_osd_read_header()) MGS-osd: error reading log header from [0xa:0xa:0x0]: rc = -14
2013-09-30 14:34:04 LustreError: 8465:0:(mgs_llog.c:1386:record_start_log()) MGS: can&apos;t start log lustre-params: rc = -14
2013-09-30 14:34:04 BUG: unable to handle kernel NULL pointer dereference at 00000000000000b8
2013-09-30 14:34:04 IP: [&amp;lt;ffffffffa07ffe99&amp;gt;] llog_handle_put+0x9/0x70 [obdclass]
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="68001" author="cliffw" created="Mon, 30 Sep 2013 22:25:27 +0000"  >&lt;p&gt;debub_mb=1024, log dumped every 30 seconds for duration of test.&lt;/p&gt;</comment>
                            <comment id="68120" author="cliffw" created="Wed, 2 Oct 2013 00:23:22 +0000"  >&lt;p&gt;Found an easy way to reproduce this:&lt;br/&gt;
With clients mounted. on MGS:&lt;/p&gt;
&lt;ol&gt;
	&lt;li&gt;hyperion-agb5 /root &amp;gt; lctl conf_param lustre.sys.jobid_var=procname_uid&lt;br/&gt;
crashes immediately&lt;br/&gt;
And, if I set &lt;br/&gt;
export JOBID_VAR=&quot;existing&quot;&lt;br/&gt;
in my config, the test runs. &lt;/li&gt;
&lt;/ol&gt;
</comment>
                            <comment id="68375" author="tappro" created="Fri, 4 Oct 2013 15:34:58 +0000"  >&lt;p&gt;Cliff, can you reproduce that issue with commit &lt;a href=&quot;http://git.whamcloud.com/?p=fs/lustre-release.git;a=commit;h=a217228ce3e1c93fdfeb1d1aa6ff48b3f82abf83&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://git.whamcloud.com/?p=fs/lustre-release.git;a=commit;h=a217228ce3e1c93fdfeb1d1aa6ff48b3f82abf83&lt;/a&gt; ?&lt;/p&gt;</comment>
                            <comment id="68416" author="cliffw" created="Fri, 4 Oct 2013 20:08:30 +0000"  >&lt;p&gt;No, I cannot - ran the one-line test and it does not crash. Will run IOR shortly&lt;/p&gt;</comment>
                            <comment id="68491" author="cliffw" created="Mon, 7 Oct 2013 14:17:08 +0000"  >&lt;p&gt;Ran IOR without any crashes. Latest build fixes&lt;/p&gt;</comment>
                            <comment id="69073" author="jlevi" created="Wed, 16 Oct 2013 03:15:42 +0000"  >&lt;p&gt;Removed fixversion as this is a duplicate.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="20743">LU-3871</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="13529" name="agb5.crash.23.sept.2013" size="5098" author="cliffw" created="Mon, 23 Sep 2013 18:44:06 +0000"/>
                            <attachment id="13552" name="agb5.crash2.26Sept2013.txt" size="5134" author="cliffw" created="Thu, 26 Sep 2013 18:18:02 +0000"/>
                            <attachment id="13565" name="config.log.tar.gz" size="1955" author="cliffw" created="Mon, 30 Sep 2013 22:20:27 +0000"/>
                            <attachment id="13566" name="config.log.tar.gz" size="1955" author="cliffw" created="Mon, 30 Sep 2013 22:20:27 +0000"/>
                            <attachment id="13556" name="console.txt" size="5397" author="cliffw" created="Fri, 27 Sep 2013 19:09:44 +0000"/>
                            <attachment id="13553" name="dk.log.agb5.26Sept.2013.txt.gz" size="4297027" author="cliffw" created="Thu, 26 Sep 2013 18:18:02 +0000"/>
                            <attachment id="13557" name="dumplog.115459.gz" size="758069" author="cliffw" created="Fri, 27 Sep 2013 19:09:44 +0000"/>
                            <attachment id="13558" name="dumplog.115529.gz" size="750371" author="cliffw" created="Fri, 27 Sep 2013 19:09:44 +0000"/>
                            <attachment id="13559" name="dumplog.115600.gz" size="637739" author="cliffw" created="Fri, 27 Sep 2013 19:09:44 +0000"/>
                            <attachment id="13567" name="full.dumplogs.09302013.tar.gz" size="253" author="cliffw" created="Mon, 30 Sep 2013 22:25:27 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzw3p3:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>10693</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>