Details
-
Bug
-
Resolution: Fixed
-
Minor
-
Lustre 2.15.0
-
None
-
3
-
9223372036854775807
Description
Gerrit Janitor testing with lockdep enabled reports a potential lock inversion problem on the client console log during runtests:
http://testing.linuxhacker.ru:3333/lustre-reports/22787/results.html
http://testing.linuxhacker.ru:3333/lustre-reports/22787/testresults/runtests-rhel8-zfs-rhel8.5_x86_64-rhel8.5_x86_64/
[ 272.552541] [ 272.553139] ======================================================== [ 272.554212] WARNING: possible irq lock inversion dependency detected [ 272.555211] 4.18.0rh8.5-debug #2 Tainted: G O --------- - - [ 272.556374] -------------------------------------------------------- [ 272.557413] socknal_sd01_01/2126 just changed the state of lock: [ 272.558402] ffffffffa0593070 (&ksocknal_data.ksnd_global_lock){++.?}-{2:2}, at: ksocknal_data_ready+0x2b/0x110 [ksocklnd] [ 272.560260] but this lock took another, SOFTIRQ-unsafe lock in the past: [ 272.561416] (&tcd->tcd_lock){+.+.}-{2:2} [ 272.561419] and interrupts could create inverse lock ordering between them. [ 272.564283] [ 272.564283] other info that might help us debug this: [ 272.565401] Possible interrupt unsafe locking scenario: [ 272.565401] [ 272.566542] CPU0 CPU1 [ 272.567437] ---- ---- [ 272.568273] lock(&tcd->tcd_lock); [ 272.568819] local_irq_disable(); [ 272.570068] lock(&ksocknal_data.ksnd_global_lock); [ 272.571353] lock(&tcd->tcd_lock); [ 272.572443] <Interrupt> [ 272.573004] lock(&ksocknal_data.ksnd_global_lock); [ 272.573874] [ 272.573874] *** DEADLOCK *** [ 272.573874] [ 272.574864] 5 locks held by socknal_sd01_01/2126: [ 272.575750] #0: ffff888118da5918 (&keys[i]){+.+.}-{2:2}, at: cfs_percpt_lock+0xaa/0x170 [libcfs] [ 272.577328] #1: ffff888118da5a18 (&keys[i]#2){+.+.}-{2:2}, at: cfs_percpt_lock+0xaa/0x170 [libcfs] [ 272.579057] #2: ffffffff82f2e400 (rcu_read_lock){....}-{1:2}, at: netif_receive_skb_internal+0x4d/0x2d0 [ 272.581396] #3: ffffffff82f2e400 (rcu_read_lock){....}-{1:2}, at: ip_local_deliver_finish+0x35/0x210 [ 272.583011] #4: ffff88810a1b4420 (k-slock-AF_INET/1){+.-.}-{2:2}, at: tcp_v4_rcv+0x1609/0x1990 [ 272.584439] [ 272.584439] the shortest dependencies between 2nd lock and 1st lock: [ 272.585764] -> (&tcd->tcd_lock){+.+.}-{2:2} ops: 1335 { [ 272.586649] HARDIRQ-ON-W at: [ 272.587201] lock_acquire+0x16a/0x6f0 [ 272.588169] _raw_spin_lock+0x50/0xd0 [ 272.589097] cfs_trace_lock_tcd+0x2f/0xc0 [libcfs] [ 272.590182] libcfs_debug_msg+0x13f/0xf70 [libcfs] [ 272.591289] cfs_cpu_init+0x17b/0x3b0 [libcfs] [ 272.592324] seq_client_rpc+0xbc/0x760 [fid] [ 272.593302] do_one_initcall+0xbe/0x620 [ 272.594279] do_init_module+0xaa/0x350 [ 272.595190] load_module+0x19c9/0x1d50 [ 272.596181] __do_sys_finit_module+0x138/0x190 [ 272.597185] __x64_sys_finit_module+0x1e/0x30 [ 272.598138] do_syscall_64+0xd4/0x5a0 [ 272.599040] entry_SYSCALL_64_after_hwframe+0x6a/0xdf [ 272.600301] SOFTIRQ-ON-W at: [ 272.600895] lock_acquire+0x16a/0x6f0 [ 272.601875] _raw_spin_lock+0x50/0xd0 [ 272.602815] cfs_trace_lock_tcd+0x2f/0xc0 [libcfs] [ 272.604134] libcfs_debug_msg+0x13f/0xf70 [libcfs] [ 272.605273] cfs_cpu_init+0x17b/0x3b0 [libcfs] [ 272.606380] seq_client_rpc+0xbc/0x760 [fid] [ 272.607438] do_one_initcall+0xbe/0x620 [ 272.608545] do_init_module+0xaa/0x350 [ 272.609478] load_module+0x19c9/0x1d50 [ 272.610396] __do_sys_finit_module+0x138/0x190 [ 272.611412] __x64_sys_finit_module+0x1e/0x30 [ 272.612456] do_syscall_64+0xd4/0x5a0 [ 272.613387] entry_SYSCALL_64_after_hwframe+0x6a/0xdf [ 272.614497] INITIAL USE at: [ 272.615027] lock_acquire+0x16a/0x6f0 [ 272.615906] _raw_spin_lock+0x50/0xd0 [ 272.616941] cfs_trace_lock_tcd+0x2f/0xc0 [libcfs] [ 272.618009] libcfs_debug_msg+0x13f/0xf70 [libcfs] [ 272.619127] cfs_cpu_init+0x17b/0x3b0 [libcfs] [ 272.620271] seq_client_rpc+0xbc/0x760 [fid] [ 272.621196] do_one_initcall+0xbe/0x620 [ 272.622144] do_init_module+0xaa/0x350 [ 272.623073] load_module+0x19c9/0x1d50 [ 272.624015] __do_sys_finit_module+0x138/0x190 [ 272.625051] __x64_sys_finit_module+0x1e/0x30 [ 272.626041] do_syscall_64+0xd4/0x5a0 [ 272.626935] entry_SYSCALL_64_after_hwframe+0x6a/0xdf [ 272.628429] } [ 272.628820] ... key at: [<ffffffffa0436360>] __key.50710+0x0/0xfffffffffffdcca0 [libcfs] [ 272.630665] ... acquired at: [ 272.631366] __lock_acquire+0x67f/0xe40 [ 272.632312] lock_acquire+0x16a/0x6f0 [ 272.633164] _raw_spin_lock+0x50/0xd0 [ 272.633964] cfs_trace_lock_tcd+0x2f/0xc0 [libcfs] [ 272.634965] libcfs_debug_msg+0x13f/0xf70 [libcfs] [ 272.635974] ksocknal_find_peer_locked+0x261/0x270 [ksocklnd] [ 272.637086] ksocknal_launch_packet+0x67/0x660 [ksocklnd] [ 272.638097] ksocknal_send+0x15e/0x3d0 [ksocklnd] [ 272.639004] lnet_ni_send+0x68/0x150 [lnet] [ 272.639793] lnet_send+0xc5/0x270 [lnet] [ 272.640594] LNetGet+0x50b/0xc60 [lnet] [ 272.641361] lnet_send_ping+0x143/0x2b0 [lnet] [ 272.642299] lnet_peer_send_ping+0xdc/0x320 [lnet] [ 272.643382] lnet_peer_discovery+0x163f/0x2260 [lnet] [ 272.644438] kthread+0x1ae/0x1e0 [ 272.645224] ret_from_fork+0x24/0x50 [ 272.646083] [ 272.646335] -> (&ksocknal_data.ksnd_global_lock){++.?}-{2:2} ops: 4782 { [ 272.647517] HARDIRQ-ON-W at: [ 272.648125] lock_acquire+0x16a/0x6f0 [ 272.649214] _raw_write_lock_bh+0x60/0xe0 [ 272.651188] ksocknal_launch_packet+0xb4/0x660 [ksocklnd] [ 272.653816] ksocknal_send+0x15e/0x3d0 [ksocklnd] [ 272.655566] lnet_ni_send+0x68/0x150 [lnet] [ 272.657191] lnet_send+0xc5/0x270 [lnet] [ 272.659140] LNetGet+0x50b/0xc60 [lnet] [ 272.661052] lnet_send_ping+0x143/0x2b0 [lnet] [ 272.662410] lnet_peer_send_ping+0xdc/0x320 [lnet] [ 272.663597] lnet_peer_discovery+0x163f/0x2260 [lnet] [ 272.664725] kthread+0x1ae/0x1e0 [ 272.665555] ret_from_fork+0x24/0x50 [ 272.666573] HARDIRQ-ON-R at: [ 272.667113] lock_acquire+0x16a/0x6f0 [ 272.667938] _raw_read_lock+0xd7/0x120 [ 272.668797] ksocknal_check_peer_timeouts+0x45/0xd70 [ksocklnd] [ 272.670193] ksocknal_reaper+0x4b8/0x6c0 [ksocklnd] [ 272.671270] kthread+0x1ae/0x1e0 [ 272.672099] ret_from_fork+0x24/0x50 [ 272.673034] IN-SOFTIRQ-R at: [ 272.673560] lock_acquire+0x16a/0x6f0 [ 272.674375] _raw_read_lock+0x64/0x120 [ 272.675268] ksocknal_data_ready+0x2b/0x110 [ksocklnd] [ 272.676414] tcp_data_ready+0x2f/0x1e0 [ 272.677336] tcp_data_queue+0x47a/0xb30 [ 272.678261] tcp_rcv_established+0x2d1/0xb90 [ 272.679280] tcp_v4_do_rcv+0x185/0x320 [ 272.680522] tcp_v4_rcv+0x169d/0x1990 [ 272.681552] ip_protocol_deliver_rcu+0x47/0x670 [ 272.682994] ip_local_deliver_finish+0xce/0x210 [ 272.684640] ip_local_deliver+0xcd/0x380 [ 272.685698] ip_rcv_finish+0x39e/0xbf0 [ 272.686942] ip_rcv+0x332/0x733 [ 272.688142] __netif_receive_skb_core+0x1503/0x1790 [ 272.689282] __netif_receive_skb+0x2b/0x90 [ 272.690932] netif_receive_skb_internal+0xd0/0x2d0 [ 272.692038] napi_gro_receive+0x25c/0x350 [ 272.693237] receive_buf+0x2aa/0x7f0 [ 272.694635] virtnet_poll+0x245/0x498 [ 272.695680] __napi_poll+0x41/0x3c0 [ 272.696725] net_rx_action+0x3cf/0x530 [ 272.697661] __do_softirq+0x139/0x879 [ 272.698689] irq_exit+0x28f/0x2d0 [ 272.699706] smp_apic_timer_interrupt+0x17c/0x530 [ 272.701015] apic_timer_interrupt+0xf/0x20 [ 272.702049] lock_is_held_type+0x61/0x230 [ 272.703010] rcu_read_lock_sched_held+0x6c/0xc0 [ 272.704188] lock_acquired+0x602/0x660 [ 272.705250] _raw_spin_lock+0x82/0xd0 [ 272.706196] cfs_percpt_lock+0xaa/0x170 [libcfs] [ 272.707331] lnet_discovery_event_handler+0x25b/0x800 [lnet] [ 272.708717] lnet_finalize+0xa28/0xe60 [lnet] [ 272.709737] ksocknal_tx_done+0xc1/0x190 [ksocklnd] [ 272.711087] ksocknal_scheduler+0x938/0x10e0 [ksocklnd] [ 272.712299] kthread+0x1ae/0x1e0 [ 272.713143] ret_from_fork+0x24/0x50 [ 272.714040] SOFTIRQ-ON-R at: [ 272.714634] lock_acquire+0x16a/0x6f0 [ 272.715459] _raw_read_lock+0xd7/0x120 [ 272.716504] ksocknal_check_peer_timeouts+0x45/0xd70 [ksocklnd] [ 272.717729] ksocknal_reaper+0x4b8/0x6c0 [ksocklnd] [ 272.718885] kthread+0x1ae/0x1e0 [ 272.719739] ret_from_fork+0x24/0x50 [ 272.720693] INITIAL USE at: [ 272.721348] lock_acquire+0x16a/0x6f0 [ 272.722329] _raw_write_lock_bh+0x60/0xe0 [ 272.723327] ksocknal_launch_packet+0xb4/0x660 [ksocklnd] [ 272.724560] ksocknal_send+0x15e/0x3d0 [ksocklnd] [ 272.725650] lnet_ni_send+0x68/0x150 [lnet] [ 272.726659] lnet_send+0xc5/0x270 [lnet] [ 272.727623] LNetGet+0x50b/0xc60 [lnet] [ 272.728748] lnet_send_ping+0x143/0x2b0 [lnet] [ 272.729682] lnet_peer_send_ping+0xdc/0x320 [lnet] [ 272.730662] lnet_peer_discovery+0x163f/0x2260 [lnet] [ 272.731812] kthread+0x1ae/0x1e0 [ 272.732731] ret_from_fork+0x24/0x50 [ 272.733550] INITIAL READ USE at: [ 272.734153] lock_acquire+0x16a/0x6f0 [ 272.735183] _raw_read_lock+0xd7/0x120 [ 272.736515] ksocknal_check_peer_timeouts+0x45/0xd70 [ksocklnd] [ 272.737954] ksocknal_reaper+0x4b8/0x6c0 [ksocklnd] [ 272.739108] kthread+0x1ae/0x1e0 [ 272.740205] ret_from_fork+0x24/0x50 [ 272.741169] } [ 272.741498] ... key at: [<ffffffffa0596580>] __key.91535+0x0/0xfffffffffffeca80 [ksocklnd] [ 272.742977] ... acquired at: [ 272.743471] __lock_acquire+0xbbc/0xe40 [ 272.744322] lock_acquire+0x16a/0x6f0 [ 272.744995] _raw_read_lock+0x64/0x120 [ 272.745664] ksocknal_data_ready+0x2b/0x110 [ksocklnd] [ 272.746577] tcp_data_ready+0x2f/0x1e0 [ 272.747298] tcp_data_queue+0x47a/0xb30 [ 272.747983] tcp_rcv_established+0x2d1/0xb90 [ 272.748799] tcp_v4_do_rcv+0x185/0x320 [ 272.749655] tcp_v4_rcv+0x169d/0x1990 [ 272.750325] ip_protocol_deliver_rcu+0x47/0x670 [ 272.751137] ip_local_deliver_finish+0xce/0x210 [ 272.751985] ip_local_deliver+0xcd/0x380 [ 272.752812] ip_rcv_finish+0x39e/0xbf0 [ 272.753494] ip_rcv+0x332/0x733 [ 272.754113] __netif_receive_skb_core+0x1503/0x1790 [ 272.755006] __netif_receive_skb+0x2b/0x90 [ 272.755683] netif_receive_skb_internal+0xd0/0x2d0 [ 272.756696] napi_gro_receive+0x25c/0x350 [ 272.757425] receive_buf+0x2aa/0x7f0 [ 272.758060] virtnet_poll+0x245/0x498 [ 272.758661] __napi_poll+0x41/0x3c0 [ 272.759391] net_rx_action+0x3cf/0x530 [ 272.760385] __do_softirq+0x139/0x879 [ 272.761221] irq_exit+0x28f/0x2d0 [ 272.761790] smp_apic_timer_interrupt+0x17c/0x530 [ 272.763096] apic_timer_interrupt+0xf/0x20 [ 272.763960] lock_is_held_type+0x61/0x230 [ 272.765203] rcu_read_lock_sched_held+0x6c/0xc0 [ 272.766986] lock_acquired+0x602/0x660 [ 272.768470] _raw_spin_lock+0x82/0xd0 [ 272.770022] cfs_percpt_lock+0xaa/0x170 [libcfs] [ 272.771249] lnet_discovery_event_handler+0x25b/0x800 [lnet] [ 272.772483] lnet_finalize+0xa28/0xe60 [lnet] [ 272.773253] ksocknal_tx_done+0xc1/0x190 [ksocklnd] [ 272.774400] ksocknal_scheduler+0x938/0x10e0 [ksocklnd] [ 272.775678] kthread+0x1ae/0x1e0 [ 272.776341] ret_from_fork+0x24/0x50