<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:59:48 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-13264] Multiple &quot;(cl_object.c:962:cl_env_percpu_put()) ASSERTION( cle == &amp;cl_env_percpu[cpu] ) failed&quot; cases triggered by cl_env_percpu_put() in osc_lock_upcall() context, with recent master configured with USE_LU_REF defined</title>
                <link>https://jira.whamcloud.com/browse/LU-13264</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;These problems have occurred when running &quot;auster sanity&quot;, with latest master configured with USE_LU_REF defined (&quot;configure --enable-lu_ref&quot;).&lt;br/&gt;
Here are the logs and the backtraces for these slightly different cases/backtraces with the same crash/LBUG.&lt;/p&gt;

&lt;p&gt;FIRST ONE :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; dmesg | less
&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;
[16713.715520] Lustre: DEBUG MARKER: == sanity test 27Cd: test maximum stripe count ======================================================= 16:39:08 (1582043948)
[16714.015089] Lustre: 14985:0:(osd_internal.h:1247:osd_trans_exec_op()) lustre-MDT0000: opcode 7: before 512 &amp;lt; left 2010, rollback = 7
[16714.032739] Lustre: 14985:0:(osd_handler.c:1807:osd_trans_dump_creds())   create: 0/0/0, destroy: 0/0/0
[16714.045410] Lustre: 14985:0:(osd_handler.c:1807:osd_trans_dump_creds()) Skipped 5 previous similar messages
[16714.058317] Lustre: 14985:0:(osd_handler.c:1814:osd_trans_dump_creds())   attr_set: 0/0/0, xattr_set: 1/89/0
[16714.071167] Lustre: 14985:0:(osd_handler.c:1814:osd_trans_dump_creds()) Skipped 5 previous similar messages
[16714.083794] Lustre: 14985:0:(osd_handler.c:1824:osd_trans_dump_creds())   write: 2001/2010/0, punch: 0/0/0, quota 3/3/0
[16714.097491] Lustre: 14985:0:(osd_handler.c:1824:osd_trans_dump_creds()) Skipped 5 previous similar messages
[16714.109917] Lustre: 14985:0:(osd_handler.c:1831:osd_trans_dump_creds())   insert: 0/0/0, delete: 0/0/0
[16714.121753] Lustre: 14985:0:(osd_handler.c:1831:osd_trans_dump_creds()) Skipped 5 previous similar messages
[16714.133993] Lustre: 14985:0:(osd_handler.c:1838:osd_trans_dump_creds())   ref_add: 0/0/0, ref_del: 0/0/0
[16714.145902] Lustre: 14985:0:(osd_handler.c:1838:osd_trans_dump_creds()) Skipped 5 previous similar messages
[16714.691028] LustreError: 146480:0:(cl_object.c:629:cl_env_init0()) ASSERTION( cle-&amp;gt;ce_ref == 0 ) failed: 
[16714.694227] LustreError: 146479:0:(cl_object.c:962:cl_env_percpu_put()) ASSERTION( cle == &amp;amp;cl_env_percpu[cpu] ) failed: 
[16714.694230] LustreError: 146479:0:(cl_object.c:962:cl_env_percpu_put()) LBUG
[16714.694234] Pid: 146479, comm: ptlrpcd_01_29 3.10.0-862.14.4.el7_lustre_ClientSymlink_279c264.x86_64 #1 SMP Thu Oct 17 10:54:24 UTC 2019
[16714.694234] Call Trace:
[16714.694276]  [&amp;lt;ffffffffc09a20ec&amp;gt;] libcfs_call_trace+0x8c/0xc0 [libcfs]
[16714.694289]  [&amp;lt;ffffffffc09a219c&amp;gt;] lbug_with_loc+0x4c/0xa0 [libcfs]
[16714.694350]  [&amp;lt;ffffffffc0ad1ade&amp;gt;] cl_env_percpu_put+0x6e/0xa0 [obdclass]
[16714.694371]  [&amp;lt;ffffffffc0fa816c&amp;gt;] osc_lock_upcall+0x9c/0x5f0 [osc]
[16714.694384]  [&amp;lt;ffffffffc0f9c432&amp;gt;] osc_enqueue_fini+0xb2/0x210 [osc]
[16714.694396]  [&amp;lt;ffffffffc0f9c6a0&amp;gt;] osc_enqueue_interpret+0x110/0x340 [osc]
[16714.694477]  [&amp;lt;ffffffffc0da6e70&amp;gt;] ptlrpc_check_set.part.23+0x480/0x1de0 [ptlrpc]
[16714.694537]  [&amp;lt;ffffffffc0da882b&amp;gt;] ptlrpc_check_set+0x5b/0xe0 [ptlrpc]
[16714.694606]  [&amp;lt;ffffffffc0dd55eb&amp;gt;] ptlrpcd_check+0x4ab/0x590 [ptlrpc]
[16714.694669]  [&amp;lt;ffffffffc0dd596b&amp;gt;] ptlrpcd+0x29b/0x550 [ptlrpc]
[16714.694676]  [&amp;lt;ffffffff844bdf21&amp;gt;] kthread+0xd1/0xe0
[16714.694683]  [&amp;lt;ffffffff84b255f7&amp;gt;] ret_from_fork_nospec_end+0x0/0x39
[16714.694696]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[16714.694698] Kernel panic - not syncing: LBUG
[16714.694703] CPU: 29 PID: 146479 Comm: ptlrpcd_01_29 Kdump: loaded Tainted: G          IOE  ------------   3.10.0-862.14.4.el7_lustre_ClientSymlink_279c264.x86_64 #1
[16714.694705] Hardware name: Intel Corporation S2600WTT/S2600WTT, BIOS SE5C610.86B.01.01.0008.021120151325 02/11/2015
[16714.694706] Call Trace:
[16714.694712]  [&amp;lt;ffffffff84b13754&amp;gt;] dump_stack+0x19/0x1b
[16714.694718]  [&amp;lt;ffffffff84b0d29f&amp;gt;] panic+0xe8/0x21f
[16714.694734]  [&amp;lt;ffffffffc09a21eb&amp;gt;] lbug_with_loc+0x9b/0xa0 [libcfs]
[16714.694776]  [&amp;lt;ffffffffc0ad1ade&amp;gt;] cl_env_percpu_put+0x6e/0xa0 [obdclass]
[16714.694791]  [&amp;lt;ffffffffc0fa816c&amp;gt;] osc_lock_upcall+0x9c/0x5f0 [osc]
[16714.694806]  [&amp;lt;ffffffffc0fa80d0&amp;gt;] ? osc_lock_upcall_speculative+0x2e0/0x2e0 [osc]
[16714.694818]  [&amp;lt;ffffffffc0f9c432&amp;gt;] osc_enqueue_fini+0xb2/0x210 [osc]
[16714.694831]  [&amp;lt;ffffffffc0f9c6a0&amp;gt;] osc_enqueue_interpret+0x110/0x340 [osc]
[16714.694888]  [&amp;lt;ffffffffc0da6e70&amp;gt;] ptlrpc_check_set.part.23+0x480/0x1de0 [ptlrpc]
[16714.694946]  [&amp;lt;ffffffffc0da882b&amp;gt;] ptlrpc_check_set+0x5b/0xe0 [ptlrpc]
[16714.695010]  [&amp;lt;ffffffffc0dd55eb&amp;gt;] ptlrpcd_check+0x4ab/0x590 [ptlrpc]
[16714.695072]  [&amp;lt;ffffffffc0dd596b&amp;gt;] ptlrpcd+0x29b/0x550 [ptlrpc]
[16714.695077]  [&amp;lt;ffffffff844d2010&amp;gt;] ? wake_up_state+0x20/0x20
[16714.695138]  [&amp;lt;ffffffffc0dd56d0&amp;gt;] ? ptlrpcd_check+0x590/0x590 [ptlrpc]
[16714.695141]  [&amp;lt;ffffffff844bdf21&amp;gt;] kthread+0xd1/0xe0
[16714.695145]  [&amp;lt;ffffffff844bde50&amp;gt;] ? insert_kthread_work+0x40/0x40
[16714.695150]  [&amp;lt;ffffffff84b255f7&amp;gt;] ret_from_fork_nospec_begin+0x21/0x21
[16714.695153]  [&amp;lt;ffffffff844bde50&amp;gt;] ? insert_kthread_work+0x40/0x40
(END)
crash&amp;gt; bt
PID: 146479  TASK: ffff8a320a248fd0  CPU: 29  COMMAND: &quot;ptlrpcd_01_29&quot;
 #0 [ffff8a325c5039a8] machine_kexec at ffffffff84462a0a
 #1 [ffff8a325c503a08] __crash_kexec at ffffffff845166c2
 #2 [ffff8a325c503ad8] panic at ffffffff84b0d2aa
 #3 [ffff8a325c503b58] lbug_with_loc at ffffffffc09a21eb [libcfs]
 #4 [ffff8a325c503b78] cl_env_percpu_put at ffffffffc0ad1ade [obdclass]
 #5 [ffff8a325c503b88] osc_lock_upcall at ffffffffc0fa816c [osc]
 #6 [ffff8a325c503bd0] osc_enqueue_fini at ffffffffc0f9c432 [osc]
 #7 [ffff8a325c503c20] osc_enqueue_interpret at ffffffffc0f9c6a0 [osc]
 #8 [ffff8a325c503c98] ptlrpc_check_set at ffffffffc0da6e70 [ptlrpc]
 #9 [ffff8a325c503d48] ptlrpc_check_set at ffffffffc0da882b [ptlrpc]
#10 [ffff8a325c503d68] ptlrpcd_check at ffffffffc0dd55eb [ptlrpc]
#11 [ffff8a325c503db8] ptlrpcd at ffffffffc0dd596b [ptlrpc]
#12 [ffff8a325c503ec8] kthread at ffffffff844bdf21
crash&amp;gt; 
crash&amp;gt; bt 146480
PID: 146480  TASK: ffff8a320a249fa0  CPU: 65  COMMAND: &quot;ptlrpcd_01_30&quot;
 #0 [ffff8a3a5eb48e48] crash_nmi_callback at ffffffff844551b7
 #1 [ffff8a3a5eb48e58] nmi_handle at ffffffff84b1d8dc
 #2 [ffff8a3a5eb48eb0] do_nmi at ffffffff84b1dafd
 #3 [ffff8a3a5eb48ef0] end_repeat_nmi at ffffffff84b1cd79
    [exception RIP: io_serial_in+21]
    RIP: ffffffff8484aab5  RSP: ffff8a31c2a1f7d8  RFLAGS: 00000002
    RAX: ffffffff8484aa00  RBX: ffffffff85614400  RCX: 0000000000000000
    RDX: 00000000000003fd  RSI: 00000000000003fd  RDI: ffffffff85614400
    RBP: ffff8a31c2a1f7d8   R8: 0000000000000000   R9: 0000000000000000
    R10: 0000000000000000  R11: ffff8a31c2a1f6ee  R12: 0000000000002700
    R13: 0000000000000020  R14: ffffffff853c1c4d  R15: 000000000000006d
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
--- &amp;lt;NMI exception stack&amp;gt; ---
 #4 [ffff8a31c2a1f7d8] io_serial_in at ffffffff8484aab5
 #5 [ffff8a31c2a1f7e0] wait_for_xmitr at ffffffff8484b746
 #6 [ffff8a31c2a1f808] serial8250_console_putchar at ffffffff8484b92c
 #7 [ffff8a31c2a1f828] uart_console_write at ffffffff8484632c
 #8 [ffff8a31c2a1f858] serial8250_console_write at ffffffff8484b86f
 #9 [ffff8a31c2a1f8a0] call_console_drivers.constprop.17 at ffffffff84495843
#10 [ffff8a31c2a1f8c8] console_unlock at ffffffff84496fe8
#11 [ffff8a31c2a1f908] vprintk_emit at ffffffff844973f4
#12 [ffff8a31c2a1f978] vprintk_default at ffffffff84497769
#13 [ffff8a31c2a1f988] printk at ffffffff84b0d436
#14 [ffff8a31c2a1f9e8] cfs_print_to_console at ffffffffc09a2bed [libcfs]
#15 [ffff8a31c2a1fa30] libcfs_debug_msg at ffffffffc098da3c [libcfs]
#16 [ffff8a31c2a1fb60] cl_env_init0 at ffffffffc0ad19b3 [obdclass]
#17 [ffff8a31c2a1fb70] cl_env_percpu_get at ffffffffc0ad1a5e [obdclass]
#18 [ffff8a31c2a1fb88] osc_lock_upcall at ffffffffc0fa810a [osc]
#19 [ffff8a31c2a1fbd0] osc_enqueue_fini at ffffffffc0f9c432 [osc]
#20 [ffff8a31c2a1fc20] osc_enqueue_interpret at ffffffffc0f9c6a0 [osc]
#21 [ffff8a31c2a1fc98] ptlrpc_check_set at ffffffffc0da6e70 [ptlrpc]
#22 [ffff8a31c2a1fd48] ptlrpc_check_set at ffffffffc0da882b [ptlrpc]
#23 [ffff8a31c2a1fd68] ptlrpcd_check at ffffffffc0dd55eb [ptlrpc]
#24 [ffff8a31c2a1fdb8] ptlrpcd at ffffffffc0dd596b [ptlrpc]
#25 [ffff8a31c2a1fec8] kthread at ffffffff844bdf21
crash&amp;gt; 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;And last Lustre debug traces for both PIDs 146479 and 146480 clearly show that 146479 it has changed from CPU #65 to #29 during its execution within osc_lock_upcall() context, and particularly in lu_ref_add() because of might_sleep(), hence the &#8220;cl_env_percpu_put()) ASSERTION( cle == &amp;amp;cl_env_percpu&lt;span class=&quot;error&quot;&gt;&amp;#91;cpu&amp;#93;&lt;/span&gt; ) failed&#8221; in cl_env_percpu_put(). When 146480 triggers &#8220;(cl_object.c:629:cl_env_init0()) ASSERTION( cle-&amp;gt;ce_ref == 0 ) failed&#8220; on CPU #65 due to previous stuff left by 146479 before being switched to CPU #29 !! :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000008:00000001:65.0:1582043949.184487:0:146479:0:(osc_lock.c:274:osc_lock_upcall()) Process entered
00010000:00000001:65.0:1582043949.184489:0:146479:0:(ldlm_lock.c:599:__ldlm_handle2lock()) Process entered
00000020:00000001:65.0:1582043949.184490:0:146479:0:(lustre_handles.c:141:class_handle2object()) Process entered
00000020:00000040:65.0:1582043949.184492:0:146479:0:(lustre_handles.c:159:class_handle2object()) GET ldlm ffff8a30e1ad8500 refcount=5
00000020:00000001:65.0:1582043949.184494:0:146479:0:(lustre_handles.c:166:class_handle2object()) Process leaving (rc=18446614541282149632 : -129532427401984 : ffff8a30e1ad8500)
&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;.
00000008:00000001:65.0:1582043949.184758:0:146480:0:(osc_request.c:2395:osc_enqueue_fini()) Process entered
00000008:00000001:65.0:1582043949.184759:0:146480:0:(osc_lock.c:274:osc_lock_upcall()) Process entered
00000020:00040000:65.0:1582043949.184760:0:146480:0:(cl_object.c:629:cl_env_init0()) ASSERTION( cle-&amp;gt;ce_ref == 0 ) failed: 
00000020:00000010:29.0:1582043949.187924:0:146479:0:(lu_ref.c:157:lu_ref_add_context()) slab-alloced &apos;link&apos;: 40 at ffff8a31c26a8f50.
00010000:00000001:29.0:1582043949.187926:0:146479:0:(ldlm_lock.c:621:__ldlm_handle2lock()) Process leaving (rc=18446614541282149632 : -129532427401984 : ffff8a30e1ad8500)
00000020:00000010:29.0:1582043949.187929:0:146479:0:(lu_ref.c:255:lu_ref_del()) slab-freed &apos;link&apos;: 40 at ffff8a3a5bff5528.
00000020:00000010:29.0:1582043949.187931:0:146479:0:(lu_ref.c:157:lu_ref_add_context()) slab-alloced &apos;link&apos;: 40 at ffff8a31c26a82f8.
00000008:00000001:29.0:1582043949.187933:0:146479:0:(osc_lock.c:159:osc_lock_lvb_update()) Process entered
00000020:00000001:29.0:1582043949.187935:0:146479:0:(cl_object.c:149:cl_object_top()) ffff8a30e8a22fd0 -&amp;gt; ffff8a31cdbe7c18
00000008:00010000:29.0:1582043949.187937:0:146479:0:(osc_lock.c:197:osc_lock_lvb_update()) ### acquired size 0, setting rss=0; kms=0, end=18446744073709551615 ns: ?? lock: ffff8a30e1ad8500/0x55623c61b9c3f430 lrc: 5/2,0 mode: PR/PR res: ?? rrc=?? type: ??? flags: 0x10020000000000 nid: local remote: 0x55623c61b9c3f4a0 expref: -99 pid: 22620 timeout: 0 lvb_type: 1
00000020:00000001:29.0:1582043949.187944:0:146479:0:(cl_object.c:149:cl_object_top()) ffff8a30e8a22fd0 -&amp;gt; ffff8a31cdbe7c18
00000020:00000001:29.0:1582043949.187946:0:146479:0:(cl_object.c:238:cl_object_attr_update()) Process entered
00000008:00000020:29.0:1582043949.187947:0:146479:0:(osc_object.c:183:osc_attr_update()) set kms from 0to 0
00020000:00000001:29.0:1582043949.187949:0:146479:0:(lovsub_object.c:116:lovsub_attr_update()) Process entered
00020000:00000001:29.0:1582043949.187950:0:146479:0:(lovsub_object.c:118:lovsub_attr_update()) Process leaving (rc=0 : 0 : 0)
00000020:00000001:29.0:1582043949.187952:0:146479:0:(cl_object.c:253:cl_object_attr_update()) Process leaving (rc=0 : 0 : 0)
00000020:00000001:29.0:1582043949.187954:0:146479:0:(cl_object.c:149:cl_object_top()) ffff8a30e8a22fd0 -&amp;gt; ffff8a31cdbe7c18
00000008:00000001:29.0:1582043949.187956:0:146479:0:(osc_lock.c:202:osc_lock_lvb_update()) Process leaving
00000020:00000001:29.0:1582043949.187957:0:146479:0:(cl_io.c:1180:cl_sync_io_note()) Process entered
00000020:00000001:29.0:1582043949.187958:0:146479:0:(cl_io.c:1208:cl_sync_io_note()) Process leaving
00000020:00040000:29.0:1582043949.187959:0:146479:0:(cl_object.c:962:cl_env_percpu_put()) ASSERTION( cle == &amp;amp;cl_env_percpu[cpu] ) failed: 
00000020:00040000:29.0:1582043949.187965:0:146479:0:(cl_object.c:962:cl_env_percpu_put()) LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;SECOND ONE :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;crash&amp;gt; sys
      KERNEL: /usr/lib/debug/lib/modules/3.10.0-862.14.4.el7_lustre_ClientSymlink_279c264.x86_64/vmlinux
    DUMPFILE: /scratch/dumps/wolf-5.wolf.hpdd.intel.com/10.8.1.5-2020-02-19-09:31:14/vmcore  [PARTIAL DUMP]
        CPUS: 72
        DATE: Wed Feb 19 09:30:56 2020
      UPTIME: 16:50:03
LOAD AVERAGE: 3.98, 2.34, 1.38
       TASKS: 1117
    NODENAME: wolf-5.wolf.hpdd.intel.com
     RELEASE: 3.10.0-862.14.4.el7_lustre_ClientSymlink_279c264.x86_64
     VERSION: #1 SMP Thu Oct 17 10:54:24 UTC 2019
     MACHINE: x86_64  (2294 Mhz)
      MEMORY: 63.9 GB
       PANIC: &quot;Kernel panic - not syncing: LBUG&quot;
crash&amp;gt; 
crash&amp;gt; dmesg | less
&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;&#8230;..
[60598.486899] Lustre: DEBUG MARKER: == sanity test 49: Change max_pages_per_rpc won&apos;t break osc extent =================================== 09:30:54 (1582104654)
[60599.708080] LustreError: 100699:0:(cl_object.c:962:cl_env_percpu_put()) ASSERTION( cle == &amp;amp;cl_env_percpu[cpu] ) failed: 
[60599.723309] LustreError: 100699:0:(cl_object.c:962:cl_env_percpu_put()) LBUG
[60599.734409] Pid: 100699, comm: dd 3.10.0-862.14.4.el7_lustre_ClientSymlink_279c264.x86_64 #1 SMP Thu Oct 17 10:54:24 UTC 2019
[60599.750031] Call Trace:
[60599.755487]  [&amp;lt;ffffffffc0b820ec&amp;gt;] libcfs_call_trace+0x8c/0xc0 [libcfs]
[60599.765526]  [&amp;lt;ffffffffc0b8219c&amp;gt;] lbug_with_loc+0x4c/0xa0 [libcfs]
[60599.774953]  [&amp;lt;ffffffffc0da6ade&amp;gt;] cl_env_percpu_put+0x6e/0xa0 [obdclass]
[60599.784833]  [&amp;lt;ffffffffc0cba16c&amp;gt;] osc_lock_upcall+0x9c/0x5f0 [osc]
[60599.794046]  [&amp;lt;ffffffffc0caee93&amp;gt;] osc_enqueue_base+0x5c3/0x6e0 [osc]
[60599.803288]  [&amp;lt;ffffffffc0cb956c&amp;gt;] osc_lock_enqueue+0x37c/0x830 [osc]
[60599.812424]  [&amp;lt;ffffffffc0dad745&amp;gt;] cl_lock_enqueue+0x65/0x120 [obdclass]
[60599.821884]  [&amp;lt;ffffffffc125a9d5&amp;gt;] lov_lock_enqueue+0x95/0x150 [lov]
[60599.830916]  [&amp;lt;ffffffffc0dad745&amp;gt;] cl_lock_enqueue+0x65/0x120 [obdclass]
[60599.840327]  [&amp;lt;ffffffffc0dadcd7&amp;gt;] cl_lock_request+0x67/0x1f0 [obdclass]
[60599.849696]  [&amp;lt;ffffffffc0db194b&amp;gt;] cl_io_lock+0xeb/0x270 [obdclass]
[60599.858545]  [&amp;lt;ffffffffc0db1cea&amp;gt;] cl_io_loop+0xba/0x1c0 [obdclass]
[60599.867366]  [&amp;lt;ffffffffc12df49a&amp;gt;] ll_file_io_generic+0x5da/0xb00 [lustre]
[60599.876880]  [&amp;lt;ffffffffc12e00e1&amp;gt;] ll_file_aio_write+0x491/0x780 [lustre]
[60599.886271]  [&amp;lt;ffffffffc12e04d0&amp;gt;] ll_file_write+0x100/0x1c0 [lustre]
[60599.895246]  [&amp;lt;ffffffffa381f240&amp;gt;] vfs_write+0xc0/0x1f0
[60599.902819]  [&amp;lt;ffffffffa382006f&amp;gt;] SyS_write+0x7f/0xf0
[60599.910247]  [&amp;lt;ffffffffa3d2579b&amp;gt;] system_call_fastpath+0x22/0x27
[60599.918717]  [&amp;lt;ffffffffffffffff&amp;gt;] 0xffffffffffffffff
[60599.925994] Kernel panic - not syncing: LBUG
[60599.932421] CPU: 18 PID: 100699 Comm: dd Kdump: loaded Tainted: G        W IOE  ------------   3.10.0-862.14.4.el7_lustre_ClientSymlink_279c264.x86_64 #1
[60599.951075] Hardware name: Intel Corporation S2600WTT/S2600WTT, BIOS SE5C610.86B.01.01.0008.021120151325 02/11/2015
[60599.964398] Call Trace:
[60599.968765]  [&amp;lt;ffffffffa3d13754&amp;gt;] dump_stack+0x19/0x1b
[60599.976118]  [&amp;lt;ffffffffa3d0d29f&amp;gt;] panic+0xe8/0x21f
[60599.983051]  [&amp;lt;ffffffffc0b821eb&amp;gt;] lbug_with_loc+0x9b/0xa0 [libcfs]
[60599.991533]  [&amp;lt;ffffffffc0da6ade&amp;gt;] cl_env_percpu_put+0x6e/0xa0 [obdclass]
[60600.000551]  [&amp;lt;ffffffffc0cba16c&amp;gt;] osc_lock_upcall+0x9c/0x5f0 [osc]
[60600.008973]  [&amp;lt;ffffffffc0caee93&amp;gt;] osc_enqueue_base+0x5c3/0x6e0 [osc]
[60600.017566]  [&amp;lt;ffffffffc0cba0d0&amp;gt;] ? osc_lock_upcall_speculative+0x2e0/0x2e0 [osc]
[60600.027395]  [&amp;lt;ffffffffc0cb956c&amp;gt;] osc_lock_enqueue+0x37c/0x830 [osc]
[60600.035953]  [&amp;lt;ffffffffc0dad745&amp;gt;] cl_lock_enqueue+0x65/0x120 [obdclass]
[60600.044755]  [&amp;lt;ffffffffc125a9d5&amp;gt;] lov_lock_enqueue+0x95/0x150 [lov]
[60600.053156]  [&amp;lt;ffffffffc0dad745&amp;gt;] cl_lock_enqueue+0x65/0x120 [obdclass]
[60600.061920]  [&amp;lt;ffffffffc0dadcd7&amp;gt;] cl_lock_request+0x67/0x1f0 [obdclass]
[60600.070642]  [&amp;lt;ffffffffc0db194b&amp;gt;] cl_io_lock+0xeb/0x270 [obdclass]
[60600.078846]  [&amp;lt;ffffffffc0db1cea&amp;gt;] cl_io_loop+0xba/0x1c0 [obdclass]
[60600.086993]  [&amp;lt;ffffffffc12df49a&amp;gt;] ll_file_io_generic+0x5da/0xb00 [lustre]
[60600.095791]  [&amp;lt;ffffffffc12e00e1&amp;gt;] ll_file_aio_write+0x491/0x780 [lustre]
[60600.104459]  [&amp;lt;ffffffffc12e04d0&amp;gt;] ll_file_write+0x100/0x1c0 [lustre]
[60600.112693]  [&amp;lt;ffffffffa381f240&amp;gt;] vfs_write+0xc0/0x1f0
[60600.119532]  [&amp;lt;ffffffffa382006f&amp;gt;] SyS_write+0x7f/0xf0
[60600.126232]  [&amp;lt;ffffffffa3d2579b&amp;gt;] system_call_fastpath+0x22/0x27
(END)
crash&amp;gt; 
crash&amp;gt; bt 100699
PID: 100699  TASK: ffff99f600b63f40  CPU: 18  COMMAND: &quot;dd&quot;
 #0 [ffff99f5e3897788] machine_kexec at ffffffffa3662a0a
 #1 [ffff99f5e38977e8] __crash_kexec at ffffffffa37166c2
 #2 [ffff99f5e38978b8] panic at ffffffffa3d0d2aa
 #3 [ffff99f5e3897938] lbug_with_loc at ffffffffc0b821eb [libcfs]
 #4 [ffff99f5e3897958] cl_env_percpu_put at ffffffffc0da6ade [obdclass]
 #5 [ffff99f5e3897968] osc_lock_upcall at ffffffffc0cba16c [osc]
 #6 [ffff99f5e38979b0] osc_enqueue_base at ffffffffc0caee93 [osc]
 #7 [ffff99f5e3897a68] osc_lock_enqueue at ffffffffc0cb956c [osc]
 #8 [ffff99f5e3897af0] cl_lock_enqueue at ffffffffc0dad745 [obdclass]
 #9 [ffff99f5e3897b28] lov_lock_enqueue at ffffffffc125a9d5 [lov]
#10 [ffff99f5e3897b68] cl_lock_enqueue at ffffffffc0dad745 [obdclass]
#11 [ffff99f5e3897ba0] cl_lock_request at ffffffffc0dadcd7 [obdclass]
#12 [ffff99f5e3897bd8] cl_io_lock at ffffffffc0db194b [obdclass]
#13 [ffff99f5e3897c20] cl_io_loop at ffffffffc0db1cea [obdclass]
#14 [ffff99f5e3897c50] ll_file_io_generic at ffffffffc12df49a [lustre]
#15 [ffff99f5e3897d58] ll_file_aio_write at ffffffffc12e00e1 [lustre]
#16 [ffff99f5e3897de0] ll_file_write at ffffffffc12e04d0 [lustre]
#17 [ffff99f5e3897ec8] vfs_write at ffffffffa381f240
#18 [ffff99f5e3897f08] sys_write at ffffffffa382006f
#19 [ffff99f5e3897f50] system_call_fastpath at ffffffffa3d2579b
    RIP: 00007fd19b2ce9b0  RSP: 00007ffcafbeda78  RFLAGS: 00000246
    RAX: 0000000000000001  RBX: 0000000000000000  RCX: ffffffffffffffff
    RDX: 0000000000001000  RSI: 0000000000912000  RDI: 0000000000000001
    RBP: 0000000000001000   R8: 0000000000000001   R9: 0000000000003003
    R10: 00007ffcafbed4e0  R11: 0000000000000246  R12: 0000000000001000
    R13: 0000000000912000  R14: 0000000000913000  R15: 0000000000000000
    ORIG_RAX: 0000000000000001  CS: 0033  SS: 002b
crash&amp;gt; 
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;And last Lustre debug traces for PID 100699 clearly show that it has changed from CPU #0 to #18 during its execution within osc_lock_upcall() context, and particularly in __ldlm_handle2lock()-&amp;gt;lu_ref_add() sequence  because of might_sleep() !! :&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000008:00000001:0.0:1582104656.226704:0:100699:0:(osc_lock.c:274:osc_lock_upcall()) Process entered
00010000:00000001:0.0:1582104656.226704:0:100699:0:(ldlm_lock.c:599:__ldlm_handle2lock()) Process entered
00000020:00000001:0.0:1582104656.226705:0:100699:0:(lustre_handles.c:141:class_handle2object()) Process entered
00000020:00000040:0.0:1582104656.226705:0:100699:0:(lustre_handles.c:159:class_handle2object()) GET ldlm ffff99fd2cf91680 refcount=8
00000020:00000001:0.0:1582104656.226706:0:100699:0:(lustre_handles.c:166:class_handle2object()) Process leaving (rc=18446631911393138304 : -112162316413312 : ffff99fd2cf91680)
00000020:00000010:0.0:1582104656.226707:0:100699:0:(lu_ref.c:157:lu_ref_add_context()) slab-alloced &apos;link&apos;: 40 at ffff99fd5e6c2e88.
00010000:00000001:0.0:1582104656.226708:0:100699:0:(ldlm_lock.c:621:__ldlm_handle2lock()) Process leaving (rc=18446631911393138304 : -112162316413312 : ffff99fd2cf91680)
00000020:00000010:0.0:1582104656.226709:0:100699:0:(lu_ref.c:255:lu_ref_del()) slab-freed &apos;link&apos;: 40 at ffff99fd5e6c2b90.
00000020:00000010:0.0:1582104656.226710:0:100699:0:(lu_ref.c:157:lu_ref_add_context()) slab-alloced &apos;link&apos;: 40 at ffff99fd5e6c2b90.
00010000:00000001:0.0:1582104656.226711:0:100699:0:(ldlm_lock.c:599:__ldlm_handle2lock()) Process entered
00000020:00000001:0.0:1582104656.226711:0:100699:0:(lustre_handles.c:141:class_handle2object()) Process entered
00000020:00000040:0.0:1582104656.226712:0:100699:0:(lustre_handles.c:159:class_handle2object()) GET ldlm ffff99fd2cf91680 refcount=9
00000020:00000001:0.0:1582104656.226712:0:100699:0:(lustre_handles.c:166:class_handle2object()) Process leaving (rc=18446631911393138304 : -112162316413312 : ffff99fd2cf91680)
00000020:00000010:18.0:1582104656.226768:0:100699:0:(lu_ref.c:157:lu_ref_add_context()) slab-alloced &apos;link&apos;: 40 at ffff99fda6ecf320.
00010000:00000001:18.0:1582104656.226773:0:100699:0:(ldlm_lock.c:621:__ldlm_handle2lock()) Process leaving (rc=18446631911393138304 : -112162316413312 : ffff99fd2cf91680)
00010000:00000001:18.0:1582104656.226777:0:100699:0:(ldlm_lock.c:289:ldlm_lock_remove_from_lru_check()) Process entered
00010000:00000001:18.0:1582104656.226779:0:100699:0:(ldlm_lock.c:301:ldlm_lock_remove_from_lru_check()) Process leaving (rc=0 : 0 : 0)
00000020:00000010:18.0:1582104656.226781:0:100699:0:(lu_ref.c:157:lu_ref_add_context()) slab-alloced &apos;link&apos;: 40 at ffff99fda6ecf230.
00000020:00000010:18.0:1582104656.226783:0:100699:0:(lu_ref.c:157:lu_ref_add_context()) slab-alloced &apos;link&apos;: 40 at ffff99fda6ecf618.
00010000:00010000:18.0:1582104656.226785:0:100699:0:(ldlm_lock.c:767:ldlm_lock_addref_internal_nolock()) ### ldlm_lock_addref(PW) ns: ?? lock: ffff99fd2cf91680/0x11f7c8e35cf3bf6 lrc: 10/0,2 mode: PW/PW res: ?? rrc=?? type: ??? flags: 0x810020080000000 nid: local remote: 0x11f7c8e35cf3bfd expref: -99 pid: 100699 timeout: 0 lvb_type: 1
00000020:00000010:18.0:1582104656.226794:0:100699:0:(lu_ref.c:255:lu_ref_del()) slab-freed &apos;link&apos;: 40 at ffff99fd5e6c2e88.
00010000:00000001:18.0:1582104656.226796:0:100699:0:(ldlm_lock.c:209:ldlm_lock_put()) Process entered
00010000:00000001:18.0:1582104656.226797:0:100699:0:(ldlm_lock.c:249:ldlm_lock_put()) Process leaving
00000020:00040000:18.0:1582104656.226799:0:100699:0:(cl_object.c:962:cl_env_percpu_put()) ASSERTION( cle == &amp;amp;cl_env_percpu[cpu] ) failed: 
00000020:00040000:18.0:1582104656.242030:0:100699:0:(cl_object.c:962:cl_env_percpu_put()) LBUG
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;So all of this clearly indicate that all lu_ref related work within osc_lock_upcall() context needs to be done atomically and without scheduling/sleeping, and particularly in _ldlm_handle2lock() and osc_lock_granted() where lu_add_ref_atomic() should be used instead of lu_add_ref() (calling might_sleep() and likely to cause a scheduling/cpu-switch...).&lt;/p&gt;

&lt;p&gt;Will push a patch soon to address and tentatively fix these cases.&lt;/p&gt;</description>
                <environment></environment>
        <key id="58124">LU-13264</key>
            <summary>Multiple &quot;(cl_object.c:962:cl_env_percpu_put()) ASSERTION( cle == &amp;cl_env_percpu[cpu] ) failed&quot; cases triggered by cl_env_percpu_put() in osc_lock_upcall() context, with recent master configured with USE_LU_REF defined</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="bruno">Bruno Faccini</assignee>
                                    <reporter username="bruno">Bruno Faccini</reporter>
                        <labels>
                    </labels>
                <created>Wed, 19 Feb 2020 16:06:36 +0000</created>
                <updated>Tue, 25 Feb 2020 14:00:34 +0000</updated>
                            <resolved>Tue, 25 Feb 2020 14:00:34 +0000</resolved>
                                                    <fixVersion>Lustre 2.14.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>2</watches>
                                                                            <comments>
                            <comment id="263585" author="gerrit" created="Wed, 19 Feb 2020 16:27:34 +0000"  >&lt;p&gt;Faccini Bruno (bruno.faccini@intel.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/37629&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37629&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13264&quot; title=&quot;Multiple &amp;quot;(cl_object.c:962:cl_env_percpu_put()) ASSERTION( cle == &amp;amp;cl_env_percpu[cpu] ) failed&amp;quot; cases triggered by cl_env_percpu_put() in osc_lock_upcall() context, with recent master configured with USE_LU_REF defined&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13264&quot;&gt;&lt;del&gt;LU-13264&lt;/del&gt;&lt;/a&gt; osc: ensure lu_ref work atomic from osc_lock_upcall()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: c9dde7011a2bfe6496088d78759f57607966d2f6&lt;/p&gt;</comment>
                            <comment id="263974" author="gerrit" created="Tue, 25 Feb 2020 05:52:16 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/37629/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37629/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-13264&quot; title=&quot;Multiple &amp;quot;(cl_object.c:962:cl_env_percpu_put()) ASSERTION( cle == &amp;amp;cl_env_percpu[cpu] ) failed&amp;quot; cases triggered by cl_env_percpu_put() in osc_lock_upcall() context, with recent master configured with USE_LU_REF defined&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-13264&quot;&gt;&lt;del&gt;LU-13264&lt;/del&gt;&lt;/a&gt; osc: ensure lu_ref work atomic from osc_lock_upcall()&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 1951f4dcd29dc7fac74b435c3fb84cb6a95328e7&lt;/p&gt;</comment>
                            <comment id="264018" author="pjones" created="Tue, 25 Feb 2020 14:00:34 +0000"  >&lt;p&gt;Landed for 2.14&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00tyv:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>