Details
-
Bug
-
Resolution: Fixed
-
Minor
-
None
-
None
-
3
-
9223372036854775807
Description
The panic stack is :
[1001604.771625] BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 [1001605.163344] IP: [<ffffffffa1314227>] ll_statahead_thread+0x107/0x2860 [lustre] [1001605.525225] PGD 0 [1001605.631331] Oops: 0002 [#1] SMP [1001605.797101] Modules linked in: fuse osc(OE) mgc(OE) lustre(OE) lmv(OE) fld(OE) mdc(OE) fid(OE) lov(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) sha512_ssse3 sha512_generic cts rpcsec_gss_krb5 nfsv4 crypto_null libcfs(OE) dns_resolver rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_uverbs(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlx4_en(OE) ipt_REJECT nf_reject_ipv4 nf_log_ipv4 nf_log_common xt_LOG nf_conntrack_ipv4 nf_defrag_ipv4 xt_multiport xt_recent xt_conntrack nf_conntrack iptable_filter dm_mirror mlx4_ib(OE) dm_region_hash dm_log dm_mod ib_sa(OE) ib_mad(OE) ib_core(OE) ib_addr(OE) ib_netlink(OE) intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel mgag200 lrw gf128mul ttm glue_helper ablk_helper [1001609.295329] cryptd drm_kms_helper mlx4_core(OE) syscopyarea sysfillrect sysimgblt fb_sys_fops drm iTCO_wdt mlx_compat(OE) iTCO_vendor_support ses hpwdt enclosure sb_edac pcspkr lpc_ich ipmi_devintf edac_core i2c_i801 shpchp acpi_power_meter wmi fjes ipmi_si ipmi_msghandler pcc_cpufreq nfsd binfmt_misc auth_rpcgss ip_tables bridge stp llc xfs isci libsas qla3xxx e1000e igb i2c_algo_bit i2c_core megaraid_sas aacraid aic79xx ata_piix mpt2sas raid_class mptspi scsi_transport_spi mptsas mptscsih mptbase arcmsr ahci libahci sata_nv sata_svw bnx2x libcrc32c bnx2 ext4 mbcache jbd2 sata_sil libata tg3 e1000 nfsv3 nfs_acl nfs lockd grace sunrpc fscache tun sd_mod crc_t10dif crct10dif_generic sg crct10dif_pclmul crct10dif_common ixgbe dca mdio hpilo hpsa ptp scsi_transport_sas pps_core [last unloaded: ipmi_msghandler] [1001612.730562] [1001612.739169] CPU: 1 PID: 22492 Comm: ll_sa_22485 Tainted: G OE ------------ 3.10.0-514.21.2.el7.x86_64 #1 [1001613.265741] Hardware name: HP ProLiant BL460c Gen9, BIOS I36 09/12/2016 [1001613.596748] task: ffff882bf63c5e20 ti: ffff882db9748000 task.ti: ffff882db9748000 [1001613.970335] RIP: 0010:[<ffffffffa1314227>] [<ffffffffa1314227>] ll_statahead_thread+0x107/0x2860 [lustre] [1001614.450601] RSP: 0018:ffff882db974bd20 EFLAGS: 00010246 [1001614.718782] RAX: 00000000000057dc RBX: 0000000000000000 RCX: 0000000000000000 [1001615.074787] RDX: 0000000000000046 RSI: 0000000000000000 RDI: ffff882e68eeeca8 [1001615.431144] RBP: ffff882db974bec0 R08: ffff882db9748000 R09: 0000000000000000 [1001615.790666] R10: 0000000000000001 R11: 0000000000000000 R12: ffff8822d1bc4f00 [1001616.150001] R13: ffffffffa1314120 R14: 0000000000000000 R15: 0000000000000000 [1001616.511851] FS: 0000000000000000(0000) GS:ffff881fffa40000(0000) knlGS:0000000000000000 [1001616.916485] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1001617.206037] CR2: 0000000000000088 CR3: 00000000019be000 CR4: 00000000003407e0 [1001617.564110] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1001617.921246] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1001618.281884] Stack: [1001618.391733] 0000000000000001 0000000000000014 ffff881b99375e20 ffff882db974bd78 [1001618.762956] ffffffff810ce4c4 00059af4b94ae14e ffff882db974bd78 ffffffff810caebe [1001619.134595] ffff883fb0bfc000 ffff882e68eeeca8 0000000000000001 ffff882db974bdc8 [1001619.502128] Call Trace: [1001619.632070] [<ffffffff810ce4c4>] ? update_curr+0x104/0x190 [1001619.913794] [<ffffffff810caebe>] ? account_entity_dequeue+0xae/0xd0 [1001620.233652] [<ffffffff810ce9ac>] ? dequeue_entity+0x11c/0x5d0 [1001620.531744] [<ffffffff810c1f55>] ? check_preempt_curr+0x75/0xa0 [1001620.835630] [<ffffffff810cf27e>] ? dequeue_task_fair+0x41e/0x660 [1001621.143820] [<ffffffff810c8395>] ? sched_clock_cpu+0x85/0xc0 [1001621.435359] [<ffffffff81029569>] ? __switch_to+0xd9/0x4c0 [1001621.713460] [<ffffffff8168c130>] ? __schedule+0x3b0/0x990 [1001621.992373] [<ffffffffa1314120>] ? ll_agl_thread+0x4d0/0x4d0 [lustre] [1001622.322281] [<ffffffff810b0a4f>] kthread+0xcf/0xe0 [1001622.572155] [<ffffffff810b0980>] ? kthread_create_on_node+0x140/0x140 [1001622.902516] [<ffffffff81697698>] ret_from_fork+0x58/0x90 [1001623.176213] [<ffffffff810b0980>] ? kthread_create_on_node+0x140/0x140 [1001623.505432] Code: 48 85 db 74 04 f0 ff 43 08 48 8b bd a8 fe ff ff e8 bf a6 37 e0 65 48 8b 04 25 c0 cd 00 00 f6 05 3d 82 97 ff 40 8b 80 a4 04 00 00 <89> 83 88 00 00 00 74 6e f6 05 2a 82 97 ff 80 74 65 48 8b 85 f8 [1001624.452047] RIP [<ffffffffa1314227>] ll_statahead_thread+0x107/0x2860 [lustre] [1001624.822254] RSP <ffff882db974bd20> [1001625.002284] CR2: 0000000000000088
My crash dump analysis indicates that the crash occurs because statahead newly started thread finds lli_sai == NULL when it is not expected/checked in associated code :
// FIXME: replace the name of this with LL_I to conform to kernel stuff // static inline struct ll_inode_info *LL_I(struct inode *inode) static inline struct ll_inode_info *ll_i2info(struct inode *inode) { return container_of(inode, struct ll_inode_info, lli_vfs_inode); } /* * take refcount of sai if sai for @dir exists, which means statahead is on for * this directory. */ static inline struct ll_statahead_info *ll_sai_get(struct inode *dir) { struct ll_inode_info *lli = ll_i2info(dir); struct ll_statahead_info *sai = NULL; spin_lock(&lli->lli_sa_lock); sai = lli->lli_sai; if (sai != NULL) atomic_inc(&sai->sai_refcount); spin_unlock(&lli->lli_sa_lock); return sai; <<<<< sai can be returned NULL !!!! } /* statahead thread main function */ static int ll_statahead_thread(void *arg) { struct dentry *parent = (struct dentry *)arg; struct inode *dir = parent->d_inode; struct ll_inode_info *lli = ll_i2info(dir); struct ll_sb_info *sbi = ll_i2sbi(dir); struct ll_statahead_info *sai; struct ptlrpc_thread *sa_thread; struct ptlrpc_thread *agl_thread; int first = 0; struct md_op_data *op_data; struct ll_dir_chain chain; struct l_wait_info lwi = { 0 }; struct page *page = NULL; __u64 pos = 0; int rc = 0; ENTRY; sai = ll_sai_get(dir); <<<<<<<<<<<<<<< sai can be returned NULL !!!!! sa_thread = &sai->sai_thread; agl_thread = &sai->sai_agl_thread; sa_thread->t_pid = current_pid(); <<<<<<<<<<<<<<<<<<< BUG() …………………..
Further crash-dump analysis and concerned source code reading has pointed me to a racy scenario where multiple+concurrent attempts to start statahead thread for the same dir has occurred which can lead to this BUG(), due to the fact start_statahead_thread() routine unconditionally clearing lli_sai upon error (i.e., even if already/previously set by an other starter for ll_statahead_thread!) :
static int start_statahead_thread(struct inode *dir, struct dentry *dentry) { struct ll_inode_info *lli = ll_i2info(dir); struct ll_statahead_info *sai = NULL; struct dentry *parent = dentry->d_parent; struct ptlrpc_thread *thread; struct l_wait_info lwi = { 0 }; struct task_struct *task; int rc; ENTRY; /* I am the "lli_opendir_pid" owner, only me can set "lli_sai". */ rc = is_first_dirent(dir, dentry); if (rc == LS_NOT_FIRST_DE) /* It is not "ls -{a}l" operation, no need statahead for it. */ GOTO(out, rc = -EFAULT); <<<<<<<<<<<<<<<<<<<<<<<<< sai = ll_sai_alloc(parent); if (sai == NULL) GOTO(out, rc = -ENOMEM); <<<<<<<<<<<<<<<<<<<<<<<< sai->sai_ls_all = (rc == LS_FIRST_DOT_DE); /* if current lli_opendir_key was deauthorized, or dir re-opened by * another process, don't start statahead, otherwise the newly spawned * statahead thread won't be notified to quit. */ spin_lock(&lli->lli_sa_lock); if (unlikely(lli->lli_sai != NULL || <<<<<<<<<<<<<<<<<<<<<<< lli->lli_opendir_key == NULL || lli->lli_opendir_pid != current->pid)) { spin_unlock(&lli->lli_sa_lock); GOTO(out, rc = -EPERM); } lli->lli_sai = sai; spin_unlock(&lli->lli_sa_lock); .......................... out: /* once we start statahead thread failed, disable statahead so that * subsequent stat won't waste time to try it. */ spin_lock(&lli->lli_sa_lock); lli->lli_sa_enabled = 0; lli->lli_sai = NULL; <<<<<<<<<<<<<<<<<<<<<<<<< spin_unlock(&lli->lli_sa_lock); if (sai != NULL) ll_sai_free(sai); RETURN(rc); }
So my first guess is that this problem can be fixed simply by checking if (lli->lli_sai == said) before to clear/NULL lli->lli_sai in start_statahead_thread(), I will push a patch soon to implement this.