|
this issue can be reproduced. Here is the console log from MDS, the server then restarted.
[root@iu-34 ~]# BUG: unable to handle kernel paging request at ffffffffca096da0
IP: [<ffffffff81052814>] update_curr+0x144/0x1f0
PGD 1a87067 PUD 1a88067 PMD 0
Thread overran stack, or stack corrupted
Oops: 0000 1 SMP
last sysfs file: /sys/devices/system/cpu/cpu11/cache/index2/shared_cpu_map
CPU 4
Modules linked in: nfs fscache lmv(U) obdfilter(U) ost(U) cmm(U) osd_ldiskfs(U) mdt(U) mdd(U) mds(U) fsfilt_ldiskfs(U) mgs(U) mgc(U) lustre(U) lov(U) osc(U) lquota(U) mdc(U) fid(U) fld(U) ksocklnd(U) ptlrpc(U) obdclass(U) lnet(U) lvfs(U) libcfs(U) ldiskfs(U) jbd2 nfsd lockd nfs_acl auth_rpcgss exportfs autofs4 sunrpc cpufreq_ondemand acpi_cpufreq freq_table mperf ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_addr ipv6 ib_sa ib_mad ib_core igb cdc_ether usbnet mii microcode serio_raw sg i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support ioatdma dca i7core_edac edac_core shpchp ext3 jbd mbcache sd_mod crc_t10dif pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan]
Pid: 6552, comm: nfsd Not tainted 2.6.32-220.7.1.el6_lustre.gf171aad.x86_64 #1 IBM IBM System X iDataPlex dx360 M3 Server [6391AC1]/69Y4782
RIP: 0010:[<ffffffff81052814>] [<ffffffff81052814>] update_curr+0x144/0x1f0
RSP: 0018:ffff880028283db8 EFLAGS: 00010086
RAX: ffff8806767c1500 RBX: 0000000009094000 RCX: ffff88037feed1c0
RDX: 0000000000018b48 RSI: 0000000000000000 RDI: ffff8806767c1538
RBP: ffff880028283de8 R08: ffffffff8160b6a5 R09: 0000000000000000
R10: 0000000000000010 R11: 0000000000000000 R12: ffff880028295fe8
R13: 00000000000f3b00 R14: 00000160054ace79 R15: ffff8806767c1500
FS: 0000000000000000(0000) GS:ffff880028280000(0000) knlGS:0000000000000000
CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: ffffffffca096da0 CR3: 00000006767ad000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process nfsd (pid: 6552, threadinfo ffff8806768f2000, task ffff8806767c1500)
Stack:
ffff880028283dc8 ffffffff81013743 ffff8806767c1538 ffff880028295fe8
<0> 0000000000000000 0000000000000000 ffff880028283e18 ffffffff81052e2b
<0> ffff880028295f80 0000000000000004 0000000000015f80 0000000000000004
Call Trace:
<IRQ>
[<ffffffff81013743>] ? native_sched_clock+0x13/0x80
[<ffffffff81052e2b>] task_tick_fair+0xdb/0x160
[<ffffffff81056891>] scheduler_tick+0xc1/0x260
[<ffffffff810a0b20>] ? tick_sched_timer+0x0/0xc0
[<ffffffff8107c1c2>] update_process_times+0x52/0x70
[<ffffffff810a0b86>] tick_sched_timer+0x66/0xc0
[<ffffffff8109520e>] __run_hrtimer+0x8e/0x1a0
[<ffffffff81012b59>] ? read_tsc+0x9/0x20
[<ffffffff810955b6>] hrtimer_interrupt+0xe6/0x250
[<ffffffff814f51eb>] smp_apic_timer_interrupt+0x6b/0x9b
[<ffffffff8100bc13>] apic_timer_interrupt+0x13/0x20
<EOI>
[<ffffffff812724a0>] ? strrchr+0x10/0x40
[<ffffffffa0433254>] libcfs_debug_vmsg2+0x84/0xb50 [libcfs]
[<ffffffffa080a403>] ? osc_teardown_async_page+0x183/0x450 [osc]
[<ffffffffa0822d2b>] ? osc_page_delete+0x19b/0x240 [osc]
[<ffffffffa05444c8>] cl_page_delete0+0x138/0x400 [obdclass]
[<ffffffffa05447cd>] cl_page_delete+0x3d/0xf0 [obdclass]
[<ffffffffa091370b>] ll_releasepage+0x10b/0x150 [lustre]
[<ffffffff81168ba0>] ? mem_cgroup_uncharge_cache_page+0x10/0x20
[<ffffffff8110fe10>] try_to_release_page+0x30/0x60
[<ffffffff8112a251>] shrink_page_list.clone.0+0x4f1/0x5c0
[<ffffffff812731ce>] ? number+0x2ee/0x320
[<ffffffff8112a61b>] shrink_inactive_list+0x2fb/0x740
[<ffffffff81272edc>] ? put_dec+0x10c/0x110
[<ffffffff812731ce>] ? number+0x2ee/0x320
[<ffffffff81012b59>] ? read_tsc+0x9/0x20
[<ffffffff8112b32f>] shrink_zone+0x38f/0x520
[<ffffffff8112c0d4>] zone_reclaim+0x354/0x410
[<ffffffff8112cd20>] ? isolate_pages_global+0x0/0x350
[<ffffffff811225d4>] get_page_from_freelist+0x694/0x820
[<ffffffff8127464d>] ? pointer+0xad/0xa60
[<ffffffff81123851>] __alloc_pages_nodemask+0x111/0x940
[<ffffffff812756c6>] ? vsnprintf+0x2b6/0x5f0
[<ffffffff8109b40a>] ? do_gettimeofday+0x1a/0x50
[<ffffffff81012b59>] ? read_tsc+0x9/0x20
[<ffffffff8115dd72>] kmem_getpages+0x62/0x170
[<ffffffff8115e3df>] cache_grow+0x2cf/0x320
[<ffffffff8115e632>] cache_alloc_refill+0x202/0x240
[<ffffffffa0429a13>] ? cfs_alloc+0x63/0x90 [libcfs]
[<ffffffff8115f359>] __kmalloc+0x1a9/0x220
[<ffffffffa0429a13>] cfs_alloc+0x63/0x90 [libcfs]
[<ffffffffa062177a>] ptlrpc_prep_bulk_imp+0x7a/0x350 [ptlrpc]
[<ffffffffa0630d9c>] ? lustre_msg_set_timeout+0x9c/0x110 [ptlrpc]
[<ffffffffa08113df>] osc_brw_prep_request+0x7cf/0x1030 [osc]
[<ffffffffa08269bb>] ? osc_req_attr_set+0xfb/0x2a0 [osc]
[<ffffffffa054e3b8>] ? cl_req_prep+0x108/0x190 [obdclass]
[<ffffffffa0812dd5>] osc_send_oap_rpc+0x1195/0x1c20 [osc]
[<ffffffffa0813b3e>] osc_check_rpcs+0x2de/0x470 [osc]
[<ffffffffa080a143>] ? on_list+0x43/0x50 [osc]
[<ffffffffa08146e3>] osc_queue_async_io+0x3c3/0x8f0 [osc]
[<ffffffffa0544131>] ? cl_page_get+0xa1/0x120 [obdclass]
[<ffffffffa08225df>] osc_page_cache_add+0xcf/0x200 [osc]
[<ffffffffa05425a8>] cl_page_invoke+0xb8/0x160 [obdclass]
[<ffffffffa05435b8>] cl_page_cache_add+0x58/0x240 [obdclass]
[<ffffffffa08df96c>] ? vvp_write_pending+0xcc/0x150 [lustre]
[<ffffffffa0924ad3>] vvp_io_commit_write+0x343/0x5a0 [lustre]
[<ffffffffa0542b0c>] ? cl_page_at_trusted+0x12c/0x1d0 [obdclass]
[<ffffffffa05514af>] cl_io_commit_write+0xaf/0x1e0 [obdclass]
[<ffffffffa05429ae>] ? cl_page_is_owned+0xee/0x120 [obdclass]
[<ffffffffa08fbfad>] ll_commit_write+0xed/0x300 [lustre]
[<ffffffffa0913780>] ll_write_end+0x30/0x60 [lustre]
[<ffffffff81111424>] generic_file_buffered_write+0x174/0x2a0
[<ffffffff81070637>] ? current_fs_time+0x27/0x30
[<ffffffff81112d10>] __generic_file_aio_write+0x250/0x480
[<ffffffff81112faf>] generic_file_aio_write+0x6f/0xe0
[<ffffffffa09253e1>] vvp_io_write_start+0xa1/0x270 [lustre]
[<ffffffffa054dc88>] cl_io_start+0x68/0x170 [obdclass]
[<ffffffffa0552800>] cl_io_loop+0x110/0x1c0 [obdclass]
[<ffffffffa08cc99b>] ll_file_io_generic+0x44b/0x580 [lustre]
[<ffffffffa043ae2b>] ? cfs_hash_add_unique+0x1b/0x40 [libcfs]
[<ffffffffa05419ce>] ? cl_env_get+0x19e/0x350 [obdclass]
[<ffffffffa08ccc0f>] ll_file_aio_write+0x13f/0x310 [lustre]
[<ffffffffa08ccad0>] ? ll_file_aio_write+0x0/0x310 [lustre]
[<ffffffff8117614b>] do_sync_readv_writev+0xfb/0x140
[<ffffffff81090a90>] ? autoremove_wake_function+0x0/0x40
[<ffffffff8120c216>] ? security_file_permission+0x16/0x20
[<ffffffff811771df>] do_readv_writev+0xcf/0x1f0
[<ffffffff81177346>] vfs_writev+0x46/0x60
[<ffffffffa03353d5>] nfsd_vfs_write+0x105/0x430 [nfsd]
[<ffffffff81174052>] ? dentry_open+0x52/0xc0
[<ffffffffa0337a4e>] ? nfsd_open+0x13e/0x210 [nfsd]
[<ffffffffa0337ed7>] nfsd_write+0xe7/0x100 [nfsd]
[<ffffffffa033f7ef>] nfsd3_proc_write+0xaf/0x140 [nfsd]
[<ffffffffa033043e>] nfsd_dispatch+0xfe/0x240 [nfsd]
[<ffffffffa029e594>] svc_process_common+0x344/0x640 [sunrpc]
[<ffffffff8105e7f0>] ? default_wake_function+0x0/0x20
[<ffffffffa029ebd0>] svc_process+0x110/0x160 [sunrpc]
[<ffffffffa0330b62>] nfsd+0xc2/0x160 [nfsd]
[<ffffffffa0330aa0>] ? nfsd+0x0/0x160 [nfsd]
[<ffffffff81090726>] kthread+0x96/0xa0
[<ffffffff8100c14a>] child_rip+0xa/0x20
[<ffffffff81090690>] ? kthread+0x0/0xa0
[<ffffffff8100c140>] ? child_rip+0x0/0x20
Code: 00 8b 15 cc 36 a4 00 85 d2 74 34 48 8b 50 08 8b 5a 18 48 8b 90 10 09 00 00 48 8b 4a 50 48 85 c9 74 1d 48 63 db 66 90 48 8b 51 20 <48> 03 14 dd a0 6d bf 81 4c 01 2a 48 8b 49 78 48 85 c9 75 e8 48
RIP [<ffffffff81052814>] update_curr+0x144/0x1f0
RSP <ffff880028283db8>
CR2: ffffffffca096da0
Initializing cgroup subsys cpuset
|