<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:22:43 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-9038] sanity-hsm: kernel panic in lbcfs_kkuc_group_put()</title>
                <link>https://jira.whamcloud.com/browse/LU-9038</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for Quentin Bouget &amp;lt;quentin.bouget@cea.fr&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://testing.hpdd.intel.com/test_sets/7da4faf0-df0c-11e6-b490-5254006e85c2&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;patch_for_lu-8911&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;test_1a crashes the Client 2 on Maloo with a very low failure rate (&amp;lt;5%)&lt;br/&gt;
 Following tests fail with &quot;&apos;/mnt/lustre2&apos; not mounted&quot; and eventually sanity-hsm times out.&lt;/p&gt;

&lt;p&gt;There seems to be a race in libcfs_kkuc_group_put().&lt;/p&gt;

&lt;p&gt;I currently don&apos;t have a method to reproduce.&lt;/p&gt;</description>
                <environment></environment>
        <key id="43232">LU-9038</key>
            <summary>sanity-hsm: kernel panic in lbcfs_kkuc_group_put()</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bfaccini">Bruno Faccini</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Fri, 20 Jan 2017 14:29:21 +0000</created>
                <updated>Wed, 1 Mar 2017 13:04:04 +0000</updated>
                            <resolved>Wed, 1 Mar 2017 13:04:04 +0000</resolved>
                                                    <fixVersion>Lustre 2.10.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                                            <comments>
                            <comment id="181707" author="bfaccini" created="Mon, 23 Jan 2017 09:51:04 +0000"  >&lt;p&gt;Hmm, looks like there may be some issues with the debug datas...&lt;br/&gt;
crash log shows :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[  357.915924] Lustre: DEBUG MARKER: == sanity-hsm test 1a: mmap
[  358.280929] Lustre: DEBUG MARKER: rm -rf /tmp/arc1/shsm/*
[  358.531174] Lustre: DEBUG MARKER: mkdir -p /tmp/arc1/shsm/
[  358.775523] Lustre: DEBUG MARKER: lhsmtool_posix  --daemon --hsm-root /tmp/arc1/shsm --bandwidth 1 /mnt/lustre2 &amp;lt; /dev/null &amp;gt; /logdir/test_logs/2017-01-20/lustre-reviews-el7-x86_64--review-dne-part-1--1_101_1__43791__-70022698709400-10
1610/sanity-hsm.test_1a.copytool_log.trevis-49vm2.log 
[    2.208513] intel_powerclamp: No package C-state available
[  358.915126] BUG: unable to handle kernel 
[  358.916053] NULL pointer dereference at 0000000000000018
[  358.916053] IP: [&amp;lt;ffffffffa07b6a1d&amp;gt;] libcfs_kkuc_group_put+0x6d/0x190 [obdclass]
[  358.916053] PGD 0 
[  358.916053] Oops: 0000 [#1] SMP 
[  358.916053] Modules linked in: osc(OE) mgc(OE) lustre(OE) lmv(OE) fld(OE) mdc(OE) fid(OE) lov(OE) ksocklnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) sha512_ssse3 sha512_generic crypto_null libcfs(OE) rpcsec_gss_krb5 nfsv4 dns_resolver nfs f
scache rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod crc_t10dif crct10dif_generic ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core iosf_mbi crc32_pclmul ghash_clmulni_intel ppdev aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr nfsd virtio_balloon i2c_piix4 parport_pc parport nfs_acl lockd auth_rpcgss grace sunrpc ip_tables ext4 mbcache jbd2 ata_generic pata_acpi 8139too virtio_blk cirrus crct10dif_pclmul crct10dif_common crc32c_intel drm_kms_helper
[  358.916053]  syscopyarea sysfillrect sysimgblt fb_sys_fops ata_piix serio_raw ttm virtio_pci virtio_ring 8139cp virtio mii drm libata i2c_core floppy
[  358.916053] CPU: 1 PID: 5090 Comm: ldlm_cb00_001 Tainted: G           OE  ------------   3.10.0-514.2.2.el7.x86_64 #1
[  358.916053] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007
[  358.916053] task: ffff880079930fb0 ti: ffff88007818c000 task.ti: ffff88007818c000
[  358.916053] RIP: 0010:[&amp;lt;ffffffffa07b6a1d&amp;gt;]  [&amp;lt;ffffffffa07b6a1d&amp;gt;] libcfs_kkuc_group_put+0x6d/0x190 [obdclass]
[  358.916053] RSP: 0018:ffff88007818fc10  EFLAGS: 00010246
[  358.916053] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88007818ffd8
[  358.916053] RDX: 0000000000000000 RSI: ffff88007a8f86c0 RDI: ffffffffa07e9820
[  358.916053] RBP: ffff88007818fc38 R08: 0000000000000064 R09: 0000000000000000
[  358.916053] R10: ffff88007a849cbe R11: ffff88007818fa8e R12: ffffffffa08bf2a0
[  358.916053] R13: ffff88007a8f86c0 R14: 0000000000000000 R15: ffff88007b015000
[  358.916053] FS:  0000000000000000(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000
[  358.916053] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  358.916053] CR2: 0000000000000018 CR3: 000000007b65d000 CR4: 00000000000406e0
[  358.916053] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  358.916053] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  358.916053] Stack:
[  358.916053]  0000000000000030 0000000000000078 0000000000000009 ffff880046f51400
[  358.916053]  ffff88007a8f86c0 ffff88007818fc88 ffffffffa0a8ee3d 0000000000000002
[  358.916053]  ffff880046fa4288 000000787a8f86b0 0000000000000078 ffff880046f51400
[  358.916053] Call Trace:
[  358.916053]  [&amp;lt;ffffffffa0a8ee3d&amp;gt;] mdc_set_info_async+0x7ad/0x820 [mdc]
[  358.916053]  [&amp;lt;ffffffffa0962160&amp;gt;] ldlm_callback_handler.part.9+0x1750/0x2110 [ptlrpc]
[  358.916053]  [&amp;lt;ffffffffa064d5b7&amp;gt;] ? libcfs_debug_msg+0x57/0x80 [libcfs]
[  358.916053]  [&amp;lt;ffffffffa0962b57&amp;gt;] ldlm_callback_handler+0x37/0xd0 [ptlrpc]
[  358.916053]  [&amp;lt;ffffffffa098d86b&amp;gt;] ptlrpc_server_handle_request+0x21b/0xa90 [ptlrpc]
[  358.916053]  [&amp;lt;ffffffffa098b428&amp;gt;] ? ptlrpc_wait_event+0x98/0x340 [ptlrpc]
[  358.916053]  [&amp;lt;ffffffff810c4ff2&amp;gt;] ? default_wake_function+0x12/0x20
[  358.916053]  [&amp;lt;ffffffff810ba238&amp;gt;] ? __wake_up_common+0x58/0x90
[  358.916053]  [&amp;lt;ffffffffa0991920&amp;gt;] ptlrpc_main+0xaa0/0x1de0 [ptlrpc]
[  358.916053]  [&amp;lt;ffffffffa0990e80&amp;gt;] ? ptlrpc_register_service+0xe40/0xe40 [ptlrpc]
[  358.916053]  [&amp;lt;ffffffff810b064f&amp;gt;] kthread+0xcf/0xe0
[  358.916053]  [&amp;lt;ffffffff810b0580&amp;gt;] ? kthread_create_on_node+0x140/0x140
[  358.916053]  [&amp;lt;ffffffff81696718&amp;gt;] ret_from_fork+0x58/0x90
[  358.916053]  [&amp;lt;ffffffff810b0580&amp;gt;] ? kthread_create_on_node+0x140/0x140
[  358.916053] Code: e8 39 40 ed e0 49 8b 1c 24 4c 39 e3 0f 84 25 01 00 00 45 31 f6 31 c0 eb 12 0f 1f 44 00 00 83 f8 e0 74 6b 48 8b 1b 4c 39 e3 74 23 &amp;lt;48&amp;gt; 8b 7b 18 48 85 ff 74 ef 4c 89 ee e8 32 fd ff ff 85 c0 75 de 
[  358.916053] RIP  [&amp;lt;ffffffffa07b6a1d&amp;gt;] libcfs_kkuc_group_put+0x6d/0x190 [obdclass]
[  358.916053]  RSP &amp;lt;ffff88007818fc10&amp;gt;
[  358.916053] CR2: 0000000000000018
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;when disassembly shows :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;   0xffffffffa0a8ee20 &amp;lt;mdc_set_info_async+1936&amp;gt;:        and    $0x8,%al
   0xffffffffa0a8ee22 &amp;lt;mdc_set_info_async+1938&amp;gt;:        mov    0xc(%r14),%eax
   0xffffffffa0a8ee26 &amp;lt;mdc_set_info_async+1942&amp;gt;:        mov    %eax,(%rsp)
   0xffffffffa0a8ee29 &amp;lt;mdc_set_info_async+1945&amp;gt;:        xor    %eax,%eax
   0xffffffffa0a8ee2b &amp;lt;mdc_set_info_async+1947&amp;gt;:        callq  0xffffffffa064d560 &amp;lt;libcfs_debug_msg&amp;gt;
   0xffffffffa0a8ee30 &amp;lt;mdc_set_info_async+1952&amp;gt;:        mov    %r14,%rsi
   0xffffffffa0a8ee33 &amp;lt;mdc_set_info_async+1955&amp;gt;:        mov    $0x2,%edi
   0xffffffffa0a8ee38 &amp;lt;mdc_set_info_async+1960&amp;gt;:        callq  0xffffffffa07b69b0 &amp;lt;libcfs_kkuc_msg_put+256&amp;gt;
   0xffffffffa0a8ee3d &amp;lt;mdc_set_info_async+1965&amp;gt;:        testb  $0x1,-0x424768(%rip)        # 0xffffffffa066a6dc &amp;lt;libcfs_debug&amp;gt;
   0xffffffffa0a8ee44 &amp;lt;mdc_set_info_async+1972&amp;gt;:        je     0xffffffffa0a8e780 &amp;lt;mdc_set_info_async+240&amp;gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="181880" author="bfaccini" created="Tue, 24 Jan 2017 10:22:40 +0000"  >&lt;p&gt;I have continued the crash-dump analysis, showing more strange debug stuff, but also some interesting infos ... :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;   0xffffffffa07b69b0 &amp;lt;libcfs_kkuc_msg_put+256&amp;gt;:        data32 data32 data32 xchg %ax,%ax
   0xffffffffa07b69b5 &amp;lt;libcfs_kkuc_msg_put+261&amp;gt;:        push   %rbp
   0xffffffffa07b69b6 &amp;lt;libcfs_kkuc_msg_put+262&amp;gt;:        mov    %rsp,%rbp
   0xffffffffa07b69b9 &amp;lt;libcfs_kkuc_msg_put+265&amp;gt;:        push   %r14
   0xffffffffa07b69bb &amp;lt;libcfs_kkuc_msg_put+267&amp;gt;:        push   %r13
   0xffffffffa07b69bd &amp;lt;libcfs_kkuc_msg_put+269&amp;gt;:        mov    %rsi,%r13
   0xffffffffa07b69c0 &amp;lt;libcfs_kkuc_msg_put+272&amp;gt;:        push   %r12
   0xffffffffa07b69c2 &amp;lt;libcfs_kkuc_msg_put+274&amp;gt;:        movslq %edi,%r12
   0xffffffffa07b69c5 &amp;lt;libcfs_kkuc_msg_put+277&amp;gt;:        push   %rbx
   0xffffffffa07b69c6 &amp;lt;libcfs_kkuc_msg_put+278&amp;gt;:        sub    $0x8,%rsp
   0xffffffffa07b69ca &amp;lt;libcfs_kkuc_msg_put+282&amp;gt;:        testb  $0x1,-0x14c2f5(%rip)        # 0xffffffffa066a6dc &amp;lt;libcfs_debug&amp;gt;
   0xffffffffa07b69d1 &amp;lt;libcfs_kkuc_msg_put+289&amp;gt;:        je     0xffffffffa07b69e0 &amp;lt;libcfs_kkuc_msg_put+304&amp;gt;
   0xffffffffa07b69d3 &amp;lt;libcfs_kkuc_msg_put+291&amp;gt;:        testb  $0x20,-0x14c2fa(%rip)        # 0xffffffffa066a6e0 &amp;lt;libcfs_subsystem_debug&amp;gt;
   0xffffffffa07b69da &amp;lt;libcfs_kkuc_msg_put+298&amp;gt;:        jne    0xffffffffa07b6ad0 &amp;lt;libcfs_kkuc_msg_put+544&amp;gt;
   0xffffffffa07b69e0 &amp;lt;libcfs_kkuc_msg_put+304&amp;gt;:        shl    $0x4,%r12 &amp;lt;&amp;lt;&amp;lt; index * 16
   0xffffffffa07b69e4 &amp;lt;libcfs_kkuc_msg_put+308&amp;gt;:        mov    $0xffffffffa07e9820,%rdi  &amp;lt;&amp;lt;&amp;lt; kg_sem
   0xffffffffa07b69eb &amp;lt;libcfs_kkuc_msg_put+315&amp;gt;:        add    $0xffffffffa08bf280,%r12  &amp;lt;&amp;lt;&amp;lt;&amp;lt;&amp;lt;&amp;lt;&amp;lt; kkuc_groups[KUC_GRP_HSM]
   0xffffffffa07b69f2 &amp;lt;libcfs_kkuc_msg_put+322&amp;gt;:        callq  0xffffffff8168aa30  &amp;lt;schedule_hrtimeout_range_clock+112&amp;gt;  &amp;lt;&amp;lt;&amp;lt;&amp;lt;&amp;lt;&amp;lt; down_read(kg_sem)
   0xffffffffa07b69f7 &amp;lt;libcfs_kkuc_msg_put+327&amp;gt;:        mov    (%r12),%rbx
   0xffffffffa07b69fb &amp;lt;libcfs_kkuc_msg_put+331&amp;gt;:        cmp    %r12,%rbx
   0xffffffffa07b69fe &amp;lt;libcfs_kkuc_msg_put+334&amp;gt;:        je     0xffffffffa07b6b29 &amp;lt;libcfs_kkuc_group_put+41&amp;gt;
   0xffffffffa07b6a04 &amp;lt;libcfs_kkuc_msg_put+340&amp;gt;:        xor    %r14d,%r14d
   0xffffffffa07b6a07 &amp;lt;libcfs_kkuc_msg_put+343&amp;gt;:        xor    %eax,%eax
   0xffffffffa07b6a09 &amp;lt;libcfs_kkuc_msg_put+345&amp;gt;:        jmp    0xffffffffa07b6a1d &amp;lt;libcfs_kkuc_msg_put+365&amp;gt;
   0xffffffffa07b6a0b &amp;lt;libcfs_kkuc_msg_put+347&amp;gt;:        nopl   0x0(%rax,%rax,1)
   0xffffffffa07b6a10 &amp;lt;libcfs_kkuc_msg_put+352&amp;gt;:        cmp    $0xffffffe0,%eax
   0xffffffffa07b6a13 &amp;lt;libcfs_kkuc_msg_put+355&amp;gt;:        je     0xffffffffa07b6a80 &amp;lt;libcfs_kkuc_msg_put+464&amp;gt;
   0xffffffffa07b6a15 &amp;lt;libcfs_kkuc_msg_put+357&amp;gt;:        mov    (%rbx),%rbx
   0xffffffffa07b6a18 &amp;lt;libcfs_kkuc_msg_put+360&amp;gt;:        cmp    %r12,%rbx
   0xffffffffa07b6a1b &amp;lt;libcfs_kkuc_msg_put+363&amp;gt;:        je     0xffffffffa07b6a40 &amp;lt;libcfs_kkuc_msg_put+400&amp;gt;
   0xffffffffa07b6a1d &amp;lt;libcfs_kkuc_msg_put+365&amp;gt;:        mov    0x18(%rbx),%rdi    &amp;lt;&amp;lt;&amp;lt;&amp;lt;&amp;lt;&amp;lt; Oops!!

crash&amp;gt; p/x kkuc_groups[2]
$15 = {
  next = 0x0, 
  prev = 0x0
}
crash&amp;gt; p/x kkuc_groups
$16 = {{
    next = 0x0, 
    prev = 0x0
  }, {
    next = 0x0, 
    prev = 0x0
  }, {
    next = 0x0, 
    prev = 0x0
  }}
crash&amp;gt; p/x &amp;amp;kkuc_groups
$17 = 0xffffffffa08bf300
crash&amp;gt; rd 0xffffffffa08bf280 20
ffffffffa08bf280:  0000000000000000 0000000000000000   ................
ffffffffa08bf290:  0000000000000000 0000000000000000   ................
ffffffffa08bf2a0:  0000000000000000 0000000000000000   ................
ffffffffa08bf2b0:  0000000000000000 0000000000000000   ................
ffffffffa08bf2c0:  0000000000000000 0000000000000000   ................
ffffffffa08bf2d0:  0000000000000000 0000000000000000   ................
ffffffffa08bf2e0:  0000000000000000 0000000000000000   ................
ffffffffa08bf2f0:  0000000000000000 0000000000000000   ................
ffffffffa08bf300:  0000000000000000 0000000000000000   ................
ffffffffa08bf310:  0000000000000000 0000000000000000   ................
crash&amp;gt;


crash&amp;gt; ps | grep lhsm
   8218      1   0  ffff8800788a5e20  IN   0.0   11716    360  lhsmtool_posix
crash&amp;gt; bt 8218
PID: 8218   TASK: ffff8800788a5e20  CPU: 0   COMMAND: &quot;lhsmtool_posix&quot;
 #0 [ffff88004e15f890] __schedule at ffffffff8168b245
 #1 [ffff88004e15f908] schedule_timeout at ffffffff81689225
    RIP: 00007f3faf6dd507  RSP: 00007ffe508050b8  RFLAGS: 00010206
    RAX: 0000000000000010  RBX: ffffffff816967c9  RCX: 00007f3fafbc2614
    RDX: 00000000009fa030  RSI: 00000000401866d5  RDI: 0000000000000005
    RBP: 00007ffe50806991   R8: ffffffffffffffff   R9: 0000000000100008
    R10: 00007ffe50804e40  R11: 0000000000000206  R12: 00000000009fa010
    R13: 00000000009fa060  R14: 0000000000000000  R15: 000000000061a7a0
    ORIG_RAX: 0000000000000010  CS: 0033  SS: 002b
crash&amp;gt; 
crash&amp;gt; bt -t 8218
PID: 8218   TASK: ffff8800788a5e20  CPU: 0   COMMAND: &quot;lhsmtool_posix&quot;
              START: __schedule at ffffffff8168b245
  [ffff88004e15f8f8] __schedule at ffffffff8168b899
  [ffff88004e15f908] schedule_timeout at ffffffff81689225
  [ffff88004e15f940] process_timeout at ffffffff81096940
  [ffff88004e15f9b8] ptlrpc_set_wait at ffffffffa0977170 [ptlrpc]
  [ffff88004e15fa10] default_wake_function at ffffffff810c4fe0
  [ffff88004e15fa60] ptlrpc_queue_wait at ffffffffa097762d [ptlrpc]
  [ffff88004e15fa80] mdc_ioc_hsm_ct_register at ffffffffa0a8a195 [mdc]
  [ffff88004e15faa8] mdc_iocontrol at ffffffffa0a9354b [mdc]
  [ffff88004e15fab8] libcfs_debug_vmsg2 at ffffffffa064d0f8 [libcfs]
  [ffff88004e15faf8] vsnprintf at ffffffff81324764
  [ffff88004e15fb48] cfs_trace_unlock_tcd at ffffffffa0642395 [libcfs]
  [ffff88004e15fba0] lmv_iocontrol at ffffffffa0b36b0d [lmv]
  [ffff88004e15fbe0] security_capset at ffffffff812a8548
  [ffff88004e15fc60] ll_inode_permission at ffffffffa0b7e843 [lustre]
  [ffff88004e15fce8] copy_and_ioctl.constprop.26 at ffffffffa0b643fb [lustre]
  [ffff88004e15fd18] ll_dir_ioctl at ffffffffa0b6a734 [lustre]
  [ffff88004e15fd60] ext4_filemap_fault at ffffffffa01c0f71 [ext4]
  [ffff88004e15fdc0] unlock_page at ffffffff811806bb
  [ffff88004e15fdd8] do_read_fault at ffffffff811ac834
  [ffff88004e15fe28] handle_mm_fault at ffffffff811b0f21
  [ffff88004e15fe50] security_file_permission at ffffffff812a8d76
  [ffff88004e15fe60] get_empty_filp at ffffffff81200426
  [ffff88004e15feb8] do_vfs_ioctl at ffffffff81212035
  [ffff88004e15fec0] spurious_fault at ffffffff81691cb1
  [ffff88004e15ff30] sys_ioctl at ffffffff812122b1
  [ffff88004e15ff80] native_usergs_sysret64 at ffffffff816967c9
    RIP: 00007f3faf6dd507  RSP: 00007ffe508050b8  RFLAGS: 00010206
    RAX: 0000000000000010  RBX: ffffffff816967c9  RCX: 00007f3fafbc2614
    RDX: 00000000009fa030  RSI: 00000000401866d5  RDI: 0000000000000005
    RBP: 00007ffe50806991   R8: ffffffffffffffff   R9: 0000000000100008
    R10: 00007ffe50804e40  R11: 0000000000000206  R12: 00000000009fa010
    R13: 00000000009fa060  R14: 0000000000000000  R15: 000000000061a7a0
    ORIG_RAX: 0000000000000010  CS: 0033  SS: 002b
crash&amp;gt; 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;So it seems that CDT has started to send requests to CT before the Kernel-side of KUC &quot;pipe&quot;, for KUC_GRP_HSM group, had a chance to be initialized ....&lt;/p&gt;

&lt;p&gt;BTW, DNE environment could be a good reason for an additional delay to get this done in lmv_hsm_ct_register(), as it was already to trigger the problem detailed in &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8136&quot; title=&quot;sanity-hsm test_9 fails with &amp;#39;request on 0x200000405:0x4:0x0 is not SUCCEED on mds1&amp;#39; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8136&quot;&gt;&lt;del&gt;LU-8136&lt;/del&gt;&lt;/a&gt;.&lt;br/&gt;
So 2nd patch (&lt;a href=&quot;https://review.whamcloud.com/24542/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/24542/&lt;/a&gt;) for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8136&quot; title=&quot;sanity-hsm test_9 fails with &amp;#39;request on 0x200000405:0x4:0x0 is not SUCCEED on mds1&amp;#39; &quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8136&quot;&gt;&lt;del&gt;LU-8136&lt;/del&gt;&lt;/a&gt; (where you are an active reviewer!), to allow CT to have enough time to register with all MDTs before any HSM request could be sent, should already be a good work-around at least in the auto-tests environment (i.e., using copytool_setup() helper/function). But a more definitive fix is required to avoid such crashes.&lt;/p&gt;

&lt;p&gt;As a first way to fix, I think that in lmv_hsm_ct_register() the KUC kernel side should be initialized before the CT registration with all the MDTs, and then to be cleared upon problem/error.&lt;br/&gt;
Will push a patch in this direction soon.&lt;/p&gt;</comment>
                            <comment id="181927" author="gerrit" created="Tue, 24 Jan 2017 15:33:40 +0000"  >&lt;p&gt;Faccini Bruno (bruno.faccini@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/25050&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/25050&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9038&quot; title=&quot;sanity-hsm: kernel panic in lbcfs_kkuc_group_put()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9038&quot;&gt;&lt;del&gt;LU-9038&lt;/del&gt;&lt;/a&gt; obdclass: handle early requests vs CT registering&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 5379e798c6bcaeb86aacdec689d6dad83e663849&lt;/p&gt;</comment>
                            <comment id="181930" author="bfaccini" created="Tue, 24 Jan 2017 15:43:36 +0000"  >&lt;p&gt;In fact, I changed my mind and &lt;a href=&quot;https://review.whamcloud.com/25050&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/25050&lt;/a&gt; allows libcfs_kkuc_group_put() to detect if entry in kkuc_groups[] is initialized and if not returns an error, relying on CDT to retry request later.&lt;/p&gt;</comment>
                            <comment id="182074" author="bougetq" created="Wed, 25 Jan 2017 14:21:44 +0000"  >&lt;p&gt;To me this is another motivation for lu-7659. But I agree a temporary fix is needed.&lt;/p&gt;</comment>
                            <comment id="183545" author="bougetq" created="Mon, 6 Feb 2017 13:47:31 +0000"  >&lt;p&gt;For the record, why did you choose to modify libcfs_kkuc_group_put() instead of lmv_hsm_ct_register()?&lt;/p&gt;</comment>
                            <comment id="183570" author="bfaccini" created="Mon, 6 Feb 2017 16:59:24 +0000"  >&lt;p&gt;Well I found this way looking more accurate regarding other parts/checks of/in KKUC layer. Also it should minimize the window where retries will have to occur due possible CT side initialization delays.&lt;/p&gt;</comment>
                            <comment id="186553" author="gerrit" created="Wed, 1 Mar 2017 05:11:20 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/25050/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/25050/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-9038&quot; title=&quot;sanity-hsm: kernel panic in lbcfs_kkuc_group_put()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-9038&quot;&gt;&lt;del&gt;LU-9038&lt;/del&gt;&lt;/a&gt; obdclass: handle early requests vs CT registering&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 5571c9a73b7de3fd082fb62ab3738afc4325cbd9&lt;/p&gt;</comment>
                            <comment id="186589" author="pjones" created="Wed, 1 Mar 2017 13:04:04 +0000"  >&lt;p&gt;Landed for 2.10&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzz18f:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>