<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:47:42 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-5003] sanity-hsm test_302 llog_write()) ASSERTION( loghandle-&gt;lgh_obj != ((void *)0) </title>
                <link>https://jira.whamcloud.com/browse/LU-5003</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for wangdi &amp;lt;di.wang@intel.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;http://maloo.whamcloud.com/test_sets/d021bd8e-d2a0-11e3-a102-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://maloo.whamcloud.com/test_sets/d021bd8e-d2a0-11e3-a102-52540035b04c&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_302 failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;test failed to respond and timed out&lt;/p&gt;

&lt;p&gt;23:43:18:LustreError: 11-0: lustre-MDT0000-osp-MDT0001: Communicating with 10.1.4.101@tcp, operation obd_ping failed with -107.&lt;br/&gt;
23:43:18:LustreError: Skipped 6 previous similar messages&lt;br/&gt;
23:43:18:Lustre: lustre-MDT0000-osp-MDT0001: Connection to lustre-MDT0000 (at 10.1.4.101@tcp) was lost; in progress operations using this service will wait for recovery to complete&lt;br/&gt;
23:43:18:Lustre: Skipped 5 previous similar messages&lt;br/&gt;
23:43:18:LustreError: 166-1: MGC10.1.4.101@tcp: Connection to MGS (at 10.1.4.101@tcp) was lost; in progress operations using this service will fail&lt;br/&gt;
23:43:18:LustreError: 31202:0:(llog.c:407:llog_process_thread()) llog found corrupted&lt;br/&gt;
23:43:18:LustreError: 31202:0:(llog.c:870:llog_write()) ASSERTION( loghandle-&amp;gt;lgh_obj != ((void *)0) ) failed: &lt;br/&gt;
23:43:18:LustreError: 31202:0:(llog.c:870:llog_write()) LBUG&lt;br/&gt;
23:43:18:Pid: 31202, comm: llog_process_th&lt;br/&gt;
23:43:18:&lt;br/&gt;
23:43:18:Call Trace:&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0483895&amp;gt;&amp;#93;&lt;/span&gt; libcfs_debug_dumpstack+0x55/0x80 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa0483e97&amp;gt;&amp;#93;&lt;/span&gt; lbug_with_loc+0x47/0xb0 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa059dd42&amp;gt;&amp;#93;&lt;/span&gt; llog_write+0x352/0x420 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa04945a1&amp;gt;&amp;#93;&lt;/span&gt; ? libcfs_debug_msg+0x41/0x50 &lt;span class=&quot;error&quot;&gt;&amp;#91;libcfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa059defc&amp;gt;&amp;#93;&lt;/span&gt; llog_cancel_rec+0xbc/0x6a0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa059f073&amp;gt;&amp;#93;&lt;/span&gt; llog_process_thread+0xb93/0xdc0 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa05ed9cf&amp;gt;&amp;#93;&lt;/span&gt; ? keys_fill+0x6f/0x190 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa059f2e5&amp;gt;&amp;#93;&lt;/span&gt; llog_process_thread_daemonize+0x45/0x70 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffffa059f2a0&amp;gt;&amp;#93;&lt;/span&gt; ? llog_process_thread_daemonize+0x0/0x70 &lt;span class=&quot;error&quot;&gt;&amp;#91;obdclass&amp;#93;&lt;/span&gt;&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109aee6&amp;gt;&amp;#93;&lt;/span&gt; kthread+0x96/0xa0&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c20a&amp;gt;&amp;#93;&lt;/span&gt; child_rip+0xa/0x20&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8109ae50&amp;gt;&amp;#93;&lt;/span&gt; ? kthread+0x0/0xa0&lt;br/&gt;
23:43:18: &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;lt;ffffffff8100c200&amp;gt;&amp;#93;&lt;/span&gt; ? child_rip+0x0/0x20&lt;br/&gt;
23:43:18:&lt;br/&gt;
23:43:18:Kernel panic - not syncing: LBUG&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Info required for matching: sanity-hsm 302&lt;/p&gt;</description>
                <environment></environment>
        <key id="24556">LU-5003</key>
            <summary>sanity-hsm test_302 llog_write()) ASSERTION( loghandle-&gt;lgh_obj != ((void *)0) </summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="jamesanunez">James Nunez</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                            <label>patch</label>
                    </labels>
                <created>Sat, 3 May 2014 17:03:18 +0000</created>
                <updated>Mon, 6 Oct 2014 11:57:32 +0000</updated>
                            <resolved>Mon, 6 Oct 2014 11:57:32 +0000</resolved>
                                                    <fixVersion>Lustre 2.7.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>11</watches>
                                                                            <comments>
                            <comment id="83154" author="tappro" created="Mon, 5 May 2014 06:46:34 +0000"  >&lt;p&gt;I saw the same issue while testing another patch, it happens in test_301. I tend to think that the reason might be commit &lt;a href=&quot;http://git.whamcloud.com/?p=fs/lustre-release.git;a=commit;h=6760f383f6cd0964e70250723b9d24bce13b5318&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://git.whamcloud.com/?p=fs/lustre-release.git;a=commit;h=6760f383f6cd0964e70250723b9d24bce13b5318&lt;/a&gt;&lt;br/&gt;
it changed the plan llog deletion recognition logic so I suppose llog was deleted during cancel and is being updated after that. This is just quick idea and area to check first.&lt;/p&gt;</comment>
                            <comment id="85019" author="yujian" created="Wed, 28 May 2014 11:16:08 +0000"  >&lt;p&gt;Lustre build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-master/2065/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-master/2065/&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;conf-sanity test 57a also hit the same failure:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/6b7ba1ac-e5e1-11e3-87f3-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/6b7ba1ac-e5e1-11e3-87f3-52540035b04c&lt;/a&gt;&lt;/p&gt;
</comment>
                            <comment id="88792" author="yong.fan" created="Fri, 11 Jul 2014 02:40:48 +0000"  >&lt;p&gt;Another failure instance:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/8c42ce96-085b-11e4-8b2d-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/8c42ce96-085b-11e4-8b2d-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="91989" author="jhammond" created="Tue, 19 Aug 2014 22:15:45 +0000"  >&lt;p&gt;Another:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_logs/ef9a8e1e-27e8-11e4-8e75-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_logs/ef9a8e1e-27e8-11e4-8e75-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="94213" author="bzzz" created="Wed, 17 Sep 2014 05:08:46 +0000"  >&lt;p&gt;&lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/6d49dfa6-3e26-11e4-a0a3-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://testing.hpdd.intel.com/test_sets/6d49dfa6-3e26-11e4-a0a3-5254006e85c2&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="94219" author="zam" created="Wed, 17 Sep 2014 06:59:17 +0000"  >&lt;p&gt;I have a crash looking exactly the same at replay-vbr test_4k , the llog being processed is a config llog.&lt;br/&gt;
From a crashdump. the loghandle appears to be a client one:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;  lgh_hdr = 0xffff88005a8d0000,
  lgh_obj = 0x0 &amp;lt;per_cpu__irq_stack_union&amp;gt;,
  lgh_last_idx = 249,
  lgh_cur_idx = 0,
  lgh_cur_offset = 0,
  lgh_ctxt = 0xffff88005b433e40,
...
  lgh_name = 0x0 &amp;lt;per_cpu__irq_stack_union&amp;gt;, 
  private_data = 0x0 &amp;lt;per_cpu__irq_stack_union&amp;gt;, 
  lgh_logops = 0xffffffffa078f260 &amp;lt;llog_client_ops&amp;gt;, 
  lgh_refcount = {
    counter = 1
  }
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;lgh_obj = 0x0 is what caused llog_write() to LBUG and lgh_logops = 0xffffffffa078f260 &amp;lt;llog_client_ops&amp;gt; indicates that the llog_handle is a client one.&lt;/p&gt;</comment>
                            <comment id="94220" author="zam" created="Wed, 17 Sep 2014 07:12:48 +0000"  >&lt;p&gt;patch &lt;a href=&quot;http://review.whamcloud.com/#/c/11955/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/#/c/11955/&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="94378" author="zam" created="Thu, 18 Sep 2014 13:50:58 +0000"  >&lt;p&gt;Seems it is &lt;font color=&quot;red&quot;&gt;not&lt;/font&gt; right to compare error code with -EIO, llog block read functions returns -EINVAL if llog fle format check fail:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;&lt;span class=&quot;code-keyword&quot;&gt;static&lt;/span&gt; &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; llog_osd_next_block(&lt;span class=&quot;code-keyword&quot;&gt;const&lt;/span&gt; struct lu_env *env,
			       struct llog_handle *loghandle, &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; *cur_idx,
			       &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; next_idx, __u64 *cur_offset, void *buf,
			       &lt;span class=&quot;code-object&quot;&gt;int&lt;/span&gt; len)
{
	struct llog_thread_info	*lgi = llog_info(env);
...
		&lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (rc &amp;lt; sizeof(*tail)) {
			CERROR(&lt;span class=&quot;code-quote&quot;&gt;&quot;%s: invalid llog block at log id &quot;&lt;/span&gt;DOSTID&lt;span class=&quot;code-quote&quot;&gt;&quot;/%u &quot;&lt;/span&gt;
			       &lt;span class=&quot;code-quote&quot;&gt;&quot;offset &quot;&lt;/span&gt;LPU64&lt;span class=&quot;code-quote&quot;&gt;&quot;\n&quot;&lt;/span&gt;,
			       o-&amp;gt;do_lu.lo_dev-&amp;gt;ld_obd-&amp;gt;obd_name,
			       POSTID(&amp;amp;loghandle-&amp;gt;lgh_id.lgl_oi),
			       loghandle-&amp;gt;lgh_id.lgl_ogen, *cur_offset);
			GOTO(out, rc = -EINVAL);
		}
...
		&lt;span class=&quot;code-comment&quot;&gt;/* &lt;span class=&quot;code-keyword&quot;&gt;this&lt;/span&gt; shouldn&apos;t happen */&lt;/span&gt;
		&lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (tail-&amp;gt;lrt_index == 0) {
			CERROR(&lt;span class=&quot;code-quote&quot;&gt;&quot;%s: invalid llog tail at log id &quot;&lt;/span&gt;DOSTID&lt;span class=&quot;code-quote&quot;&gt;&quot;/%u &quot;&lt;/span&gt;
			       &lt;span class=&quot;code-quote&quot;&gt;&quot;offset &quot;&lt;/span&gt;LPU64&lt;span class=&quot;code-quote&quot;&gt;&quot;\n&quot;&lt;/span&gt;,
			       o-&amp;gt;do_lu.lo_dev-&amp;gt;ld_obd-&amp;gt;obd_name,
			       POSTID(&amp;amp;loghandle-&amp;gt;lgh_id.lgl_oi),
			       loghandle-&amp;gt;lgh_id.lgl_ogen, *cur_offset);
			GOTO(out, rc = -EINVAL);
		}
...
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;EIO is likely a temporary error (network, disk access, ...), EINVAL &amp;#8211; points to a llog corruption.&lt;br/&gt;
would it be better to change &lt;tt&gt;rc == -EIO&lt;/tt&gt;  check to &lt;tt&gt;rc == -EINVAL &amp;amp;&amp;amp; ...&lt;/tt&gt; ?&lt;/p&gt;</comment>
                            <comment id="94379" author="pjones" created="Thu, 18 Sep 2014 14:04:40 +0000"  >&lt;p&gt;Thanks Zam!&lt;/p&gt;

&lt;p&gt;James&lt;/p&gt;

&lt;p&gt;Could you please review this patch?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="95692" author="pjones" created="Mon, 6 Oct 2014 11:57:32 +0000"  >&lt;p&gt;Landed for 2.7&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                            <outwardlinks description="duplicates">
                                        <issuelink>
            <issuekey id="24600">LU-5019</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzwlrj:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>13852</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>