<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:22:17 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2090] mdt_req_handle() ASSERTION(h-&gt;mh_act != NULL) failed</title>
                <link>https://jira.whamcloud.com/browse/LU-2090</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Production MDS got stuck in a crash/reboot loop.  It hit the summary assertion this morning then again on each reboot during recovery.  Finally we aborted recovery and the MDS stabilized.  We have several crash dumps.&lt;/p&gt;

&lt;p&gt;Backtrace below and console log attached.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;LustreError: 3411:0:(mdt_handler.c:2511:mdt_req_handle()) ASSERTION(h-&amp;gt;mh_act != NULL) failed
LustreError: 3411:0:(mdt_handler.c:2511:mdt_req_handle()) LBUG

PID: 7364   TASK: ffff88079b637540  CPU: 14  COMMAND: &quot;mdt_221&quot;
 #0 [ffff88077e3abb98] machine_kexec at ffffffff8103216b
 #1 [ffff88077e3abbf8] crash_kexec at ffffffff810b8d12
 #2 [ffff88077e3abcc8] panic at ffffffff814ee999
 #3 [ffff88077e3abd48] lbug_with_loc at ffffffffa0456e1b [libcfs]
 #4 [ffff88077e3abd68] libcfs_assertion_failed at ffffffffa046042d [libcfs]
 #5 [ffff88077e3abd88] mdt_handle_common at ffffffffa0c162d9 [mdt]
 #6 [ffff88077e3abdd8] mdt_regular_handle at ffffffffa0c163f5 [mdt]
 #7 [ffff88077e3abde8] ptlrpc_main at ffffffffa0717d64 [ptlrpc]
 #8 [ffff88077e3abf48] kernel_thread at ffffffff8100c14a
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;LLNL-bugzilla-ID: 1836&lt;/p&gt;</description>
                <environment>&lt;a href=&quot;https://github.com/chaos/lustre/commits/2.1.2-3chaos&quot;&gt;https://github.com/chaos/lustre/commits/2.1.2-3chaos&lt;/a&gt;</environment>
        <key id="16248">LU-2090</key>
            <summary>mdt_req_handle() ASSERTION(h-&gt;mh_act != NULL) failed</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="2" iconUrl="https://jira.whamcloud.com/images/icons/priorities/critical.svg">Critical</priority>
                        <status id="6" iconUrl="https://jira.whamcloud.com/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="5">Cannot Reproduce</resolution>
                                        <assignee username="bzzz">Alex Zhuravlev</assignee>
                                    <reporter username="nedbass">Ned Bass</reporter>
                        <labels>
                            <label>llnl</label>
                    </labels>
                <created>Thu, 4 Oct 2012 14:30:22 +0000</created>
                <updated>Wed, 7 Oct 2015 07:41:40 +0000</updated>
                            <resolved>Wed, 7 Oct 2015 07:41:40 +0000</resolved>
                                    <version>Lustre 2.1.2</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="46010" author="bzzz" created="Thu, 4 Oct 2012 14:36:49 +0000"  >&lt;p&gt;could you print *m and *req from crash, please?&lt;/p&gt;</comment>
                            <comment id="46029" author="nedbass" created="Thu, 4 Oct 2012 19:58:19 +0000"  >&lt;p&gt;I believe I found the right pointers.  I was able to validate req by req-&amp;gt;rq_svc_thread-&amp;gt;t_pid == 7364. Also h-&amp;gt;mh_fail_id == OBD_FAIL_LLOG_CATINFO_NET which looks consistent for the name &quot;CATINFO&quot;.&lt;/p&gt;

&lt;p&gt;ptlrpc_request:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;struct ptlrpc_request {
  rq_type = 0x0,
  rq_list = {
    next = 0xffff88070dd9c808,
    prev = 0xffff88070dd9c808
  },
  rq_timed_list = {
    next = 0xffff88017ae504c0,
    prev = 0xffff88017ae504c0
  },
  rq_history_list = {
    next = 0xffff8801c4734878,
    prev = 0xffff8806f68c1c28
  },
  rq_exp_list = {
    next = 0x0,
    prev = 0x0
  },
  rq_ops = 0x0,
  rq_history_seq = 0xdda97211,
  rq_at_index = 0x4c,
  rq_status = 0x0,
  rq_lock = {
    raw_lock = {
      slock = 0x10001
    }
  },
  rq_intr = 0x0,
  rq_replied = 0x0,
  rq_err = 0x0,
  rq_timedout = 0x0,
  rq_resend = 0x0,
  rq_restart = 0x0,
  rq_replay = 0x0,
  rq_no_resend = 0x0,
  rq_waiting = 0x0,
  rq_receiving_reply = 0x0,
  rq_no_delay = 0x0,
  rq_net_err = 0x0,
  rq_wait_ctx = 0x0,
  rq_early = 0x0,
  rq_must_unlink = 0x0,
  rq_fake = 0x0,
  rq_memalloc = 0x0,
  rq_packed_final = 0x0,
  rq_hp = 0x0,
  rq_at_linked = 0x1,
  rq_reply_truncate = 0x0,
  rq_committed = 0x0,
  rq_invalid_rqset = 0x0,
  rq_phase = 3955285507,
  rq_next_phase = 0,
  rq_refcount = {
    counter = 0x1
  },
  rq_svc_thread = 0xffff88069183a1c0,
  rq_request_portal = 0x0,
  rq_reply_portal = 0x0,
  rq_nob_received = 0x0,
  rq_reqlen = 0xe0,
  rq_reqmsg = 0xffff8801c4780600,
  rq_replen = 0x0,
  rq_repmsg = 0x0,
  rq_transno = 0x0,
  rq_xid = 0x505a44c0930ad,
  rq_replay_list = {
    next = 0x0,
    prev = 0x0
  },
  rq_cli_ctx = 0x0,
  rq_svc_ctx = 0xffffffffa07aff70,
  rq_ctx_chain = {
    next = 0x0,
    prev = 0x0
  },
  rq_flvr = {
    sf_rpc = 0x0,
    sf_flags = 0x0,
    u_rpc = {&amp;lt;No data fields&amp;gt;},
    u_bulk = {
      hash = {
        hash_alg = 0x0
      }
    }
  },
  rq_sp_from = LUSTRE_SP_CLI,
  rq_ctx_init = 0x0,
  rq_ctx_fini = 0x0,
  rq_bulk_read = 0x0,
  rq_bulk_write = 0x0,
  rq_auth_gss = 0x0,
  rq_auth_remote = 0x0,
  rq_auth_usr_root = 0x0,
  rq_auth_usr_mdt = 0x0,
  rq_auth_usr_ost = 0x0,
  rq_pack_udesc = 0x0,
  rq_pack_bulk = 0x0,
  rq_no_reply = 0x0,
  rq_pill_init = 0x1,
  rq_auth_uid = 0xffffffff,
  rq_auth_mapped_uid = 0xffffffff,
  rq_user_desc = 0x0,
  rq_reply_off = 0x0,
  rq_reqbuf = 0xffff8801c4780600,
  rq_reqbuf_len = 0x0,
  rq_reqdata_len = 0xe0,
  rq_repbuf = 0x0,
  rq_repbuf_len = 0x0,
  rq_repdata = 0x0,
  rq_repdata_len = 0x0,
  rq_clrbuf = 0x0,
  rq_clrbuf_len = 0x0,
  rq_clrdata_len = 0x0,
  rq_req_swab_mask = 0x0,
  rq_rep_swab_mask = 0x0,
  rq_import_generation = 0x0,
  rq_send_state = 0,
  rq_early_count = 0x0,
  rq_req_md_h = {
    cookie = 0x0
  },
  rq_req_cbid = {
    cbid_fn = 0,
    cbid_arg = 0x0
  },
  rq_delay_limit = 0x0,
  rq_queued_time = 0x0,
  rq_arrival_time = {
    tv_sec = 0x506dbaa0,
    tv_usec = 0xb26ad
  },
  rq_reply_state = 0x0,
  rq_rqbd = 0xffff8801c4734800,
  rq_reply_md_h = {
    cookie = 0x0
  },
  rq_reply_waitq = {
    lock = {
      raw_lock = {
        slock = 0x0
      }
    },
    task_list = {
      next = 0x0,
      prev = 0x0
    }
  },
  rq_reply_cbid = {
    cbid_fn = 0,
    cbid_arg = 0x0
  },
  rq_self = 0x20000ac103cc8,
  rq_peer = {
    nid = 0x50005c0a8729b,
    pid = 0x3039
  },
  rq_export = 0xffff8803af54c800,
  rq_import = 0x0,
  rq_replay_cb = 0,
  rq_commit_cb = 0,
  rq_cb_data = 0x0,
  rq_bulk = 0x0,
  rq_sent = 0x0,
  rq_real_sent = 0x0,
  rq_deadline = 0x506dbadd,
  rq_reply_deadline = 0x0,
  rq_bulk_deadline = 0x0,
  rq_timeout = 0x0,
  rq_set_chain = {
    next = 0x0,
    prev = 0x0
  },
  rq_set_waitq = {
    lock = {
      raw_lock = {
        slock = 0x0
      }
    },
    task_list = {
      next = 0x0,
      prev = 0x0
    }
  },
  rq_set = 0x0,
  rq_interpret_reply = 0,
  rq_async_args = {
    pointer_arg = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
    space = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0}
  },
  rq_pool = 0x0,
  rq_session = {
    lc_tags = 0xa0000010,
    lc_thread = 0xffff88069183a1c0,
    lc_value = 0xffff88069f8ee380,
    lc_state = LCS_ENTERED,
    lc_remember = {
      next = 0xffff880584afdb60,
      prev = 0xffffffffa0625fb0
    },
    lc_version = 0x25,
    lc_cookie = 0x5
  },
  rq_recov_session = {
    lc_tags = 0x0,
    lc_thread = 0x0,
    lc_value = 0x0,
    lc_state = 0,
    lc_remember = {
      next = 0x0,
      prev = 0x0
    },
    lc_version = 0x0,
    lc_cookie = 0x0
  },
  rq_pill = {
    rc_req = 0xffff88070dd9c800,
    rc_fmt = 0x0,
    rc_loc = RCL_SERVER,
    rc_area = {{0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}, {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}}
  }
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;


&lt;p&gt;mdt_hanlder:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;struct mdt_handler {
  mh_name = 0xffffffffa0c4459b &quot;CATINFO&quot;, 
  mh_fail_id = 0x1309, 
  mh_opc = 0x1fb, 
  mh_flags = 0x0, 
  mh_act = 0, 
  mh_fmt = 0x0
}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="129671" author="bzzz" created="Wed, 7 Oct 2015 07:41:40 +0000"  >&lt;p&gt;the issue doesn&apos;t seem to happen? I wasn&apos;t able to reproduce that.&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                            <attachment id="11948" name="pigs-mds1-console-log.txt" size="64440" author="nedbass" created="Thu, 4 Oct 2012 14:30:22 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzv54v:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4365</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>