[LU-16576] [ 281.513748] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 Created: 17/Feb/23  Updated: 17/Feb/23

Status: Open
Project: Lustre
Component/s: None
Affects Version/s: None
Fix Version/s: None

Type: Bug Priority: Minor
Reporter: Chris Horn Assignee: WC Triage
Resolution: Unresolved Votes: 0
Labels: None

Severity: 3
Rank (Obsolete): 9223372036854775807

 Description   

Hit on master commit eed4d4c752 LU-16536 osp: don't cleanup ldlm in precleanup phase

[root@el8-mds2 ~]# start.sh
insmod /home/hornc/lustre-wc-rel/libcfs/libcfs/libcfs.ko
insmod /home/hornc/lustre-wc-rel/lnet/lnet/lnet.ko
insmod /home/hornc/lustre-wc-rel/lnet/klnds/socklnd/ksocklnd.ko
debug=+net malloc
[root@el8-mds2 ~]# lnetctl net add --net tcp --if eth2
[root@el8-mds2 ~]# lnetctl net add --net tcp2 --if eth3
[root@el8-mds2 ~]# lnetctl net del --net tcp --if eth3
<oops>
[  281.513748] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[  281.514297] PGD 8000000002d96067 P4D 8000000002d96067 PUD 55b1067 PMD 0
[  281.514715] Oops: 0000 [#1] SMP PTI
[  281.514979] CPU: 1 PID: 2948 Comm: lt-lnetctl Kdump: loaded Tainted: G           OE    --------- -  - 4.18.0-425.3.1.el8.x86_64 #1
[  281.515672] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[  281.516076] RIP: 0010:lnet_net_cmd+0x515/0xa00 [lnet]
[  281.516401] Code: 98 c0 48 c7 c6 60 f2 97 c0 48 c7 c7 c0 bc 99 c0 c7 05 c3 f8 05 00 01 00 00 00 49 89 c9 49 89 c8 e8 f0 32 ca ff e9 32 fb ff ff <4c> 8b 3c 25 00 00 00 00 49 39 df 0f 85 1e ff ff ff 49 8b 5d 40 48
[  281.517412] RSP: 0018:ffffa56c8128b640 EFLAGS: 00010286
[  281.517735] RAX: 00000000ffffffff RBX: ffff929b04167410 RCX: 0000000000000032
[  281.518117] RDX: 0000000000000004 RSI: ffffa56c8128b6a8 RDI: ffff929b03392e18
[  281.518493] RBP: 0000000000000030 R08: 0000000000000033 R09: ffffa56c8128b6ac
[  281.518869] R10: ffffa56c8128b664 R11: ffffffffffffffff R12: ffff929b03b68e40
[  281.519245] R13: ffffa56c8128ba50 R14: ffffa56c8128b698 R15: ffff929b06662800
[  281.519622] FS:  00007f7f00e3d380(0000) GS:ffff929b7db00000(0000) knlGS:0000000000000000
[  281.520043] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  281.520363] CR2: 0000000000000000 CR3: 0000000006066001 CR4: 00000000000706e0
[  281.520733] Call Trace:
[  281.520949]  genl_family_rcv_msg_doit.isra.17+0x113/0x150
[  281.521277]  genl_family_rcv_msg+0xb7/0x170
[  281.521547]  ? lnet_dyn_del_net+0x1f0/0x1f0 [lnet]
[  281.521862]  ? lnet_res_container_setup+0x2c0/0x2c0 [lnet]
[  281.522198]  ? lnet_counters_get+0x140/0x140 [lnet]
[  281.522501]  ? lnet_startup_lndnet+0x7b0/0x7b0 [lnet]
[  281.522805]  genl_rcv_msg+0x47/0xa0
[  281.523043]  ? genl_family_rcv_msg+0x170/0x170
[  281.523329]  netlink_rcv_skb+0x4c/0x130
[  281.523597]  genl_rcv+0x24/0x40
[  281.523835]  netlink_unicast+0x19a/0x230
[  281.524102]  netlink_sendmsg+0x204/0x3d0
[  281.524790]  sock_sendmsg+0x50/0x60
[  281.525080]  ____sys_sendmsg+0x1ef/0x250
[  281.525344]  ? copy_msghdr_from_user+0x5c/0x90
[  281.525612]  ___sys_sendmsg+0x7c/0xc0
[  281.525832]  ? __raw_spin_unlock+0x5/0x10
[  281.526047]  ? handle_pte_fault+0x770/0x880
[  281.526375]  ? __handle_mm_fault+0x453/0x6c0
[  281.526632]  __sys_sendmsg+0x57/0xa0
[  281.526871]  do_syscall_64+0x5b/0x1b0
[  281.527115]  entry_SYSCALL_64_after_hwframe+0x61/0xc6
[  281.527395] RIP: 0033:0x7f7effb81928

Generated at Sat Feb 10 03:28:12 UTC 2024 using Jira 9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c.