Details
-
Bug
-
Resolution: Fixed
-
Major
-
Lustre 2.11.0, Lustre 2.10.8
-
None
-
3
-
9223372036854775807
Description
RCU stalls occurring in ptlrpc_check_set were tracked to a ptlrpcd daemon that was sleeping while holding an import lock spinlock.
PID: 12920 TASK: ffff880834b47040 CPU: 7 COMMAND: "ptlrpcd_03_02" #0 [ffffc90005d5bb20] __schedule at ffffffff816a067c #1 [ffffc90005d5bba8] __cond_resched_lock at ffffffff810855ea #2 [ffffc90005d5bbc0] __purge_vmap_area_lazy at ffffffff811a2210 #3 [ffffc90005d5bbf0] free_vmap_area_noflush at ffffffff811a22bd #4 [ffffc90005d5bc08] remove_vm_area at ffffffff811a4477 #5 [ffffc90005d5bc28] __vunmap at ffffffff811a44b6 #6 [ffffc90005d5bc50] vfree at ffffffff811a459e #7 [ffffc90005d5bc68] null_free_repbuf at ffffffffa08190d8 [ptlrpc] #8 [ffffc90005d5bc80] sptlrpc_cli_free_repbuf at ffffffffa080b3d0 [ptlrpc] #9 [ffffc90005d5bca0] __ptlrpc_req_finished at ffffffffa07cfb35 [ptlrpc] #10 [ffffc90005d5bcd0] ptlrpc_free_request at ffffffffa07d03ca [ptlrpc] #11 [ffffc90005d5bce8] ptlrpc_free_committed at ffffffffa07d2bca [ptlrpc] #12 [ffffc90005d5bd30] ptlrpc_check_set at ffffffffa07d5eb1 [ptlrpc] #13 [ffffc90005d5bdd0] ptlrpcd_check at ffffffffa08022dc [ptlrpc] #14 [ffffc90005d5be18] ptlrpcd at ffffffffa080284e [ptlrpc] #15 [ffffc90005d5bf08] kthread at ffffffff8107c167 #16 [ffffc90005d5bf50] ret_from_fork at ffffffff8180024a
Examining the code in ptlrpc_check_set()->after_reply() shows that ptlrpc_free_committed is called while holding the imp_lock.
1408 static int after_reply(struct ptlrpc_request *req)
1409 {
1410 struct obd_import *imp = req->rq_import;
.....
1551 if (imp->imp_replayable) {
1552 spin_lock(&imp->imp_lock);
1553 /*
1554 * No point in adding already-committed requests to the replay
1555 * list, we will just remove them immediately. b=9829
1556 */
1557 if (req->rq_transno != 0 &&
1558 (req->rq_transno >
1559 lustre_msg_get_last_committed(req->rq_repmsg) ||
1560 req->rq_replay)) {
1561 /** version recovery */
1562 ptlrpc_save_versions(req);
1563 ptlrpc_retain_replayable_request(req, imp);
1564 } else if (req->rq_commit_cb &&
1565 list_empty(&req->rq_replay_list)) {
1566 /*
1567 * NB: don't call rq_commit_cb if it's already on
1568 * rq_replay_list, ptlrpc_free_committed() will call
1569 * it later, see LU-3618 for details
1570 */
1571 spin_unlock(&imp->imp_lock);
1572 req->rq_commit_cb(req);
1573 spin_lock(&imp->imp_lock);
1574 }
1575
1576 /*
1577 * Replay-enabled imports return commit-status information.
1578 */
1579 committed = lustre_msg_get_last_committed(req->rq_repmsg);
1580 if (likely(committed > imp->imp_peer_committed_transno))
1581 imp->imp_peer_committed_transno = committed;
1582
1583 ptlrpc_free_committed(imp);
.....
1599 spin_unlock(&imp->imp_lock);
1600 }
1601
1602 RETURN(rc);
1603 }
This causes a deadlock when another ptlrpcd daemon spins on a CPU trying to get the lock and the holder of the lock gets scheduled on the same CPU.
crash_x86_64> bt -t 12918
PID: 12918 TASK: ffff880825b3a1c0 CPU: 7 COMMAND: "ptlrpcd_03_00"
START: __schedule at ffffffff816a067c
[ffffc9000536bd10] lustre_msg_set_transno at ffffffffa07e1df7 [ptlrpc]
[ffffc9000536bd20] _raw_spin_lock at ffffffff816a5951 <--- waiting on imp_lock
[ffffc9000536bd30] ptlrpc_check_set at ffffffffa07d5e3e [ptlrpc]
[ffffc9000536bd88] schedule_timeout at ffffffff816a4459
[ffffc9000536bdd0] ptlrpcd_check at ffffffffa08022dc [ptlrpc]
[ffffc9000536be18] ptlrpcd at ffffffffa080284e [ptlrpc]
[ffffc9000536be58] default_wake_function at ffffffff81086380
[ffffc9000536bf08] kthread at ffffffff8107c167
[ffffc9000536bf18] ptlrpcd at ffffffffa08024d0 [ptlrpc]
[ffffc9000536bf20] kthread at ffffffff8107c040
[ffffc9000536bf50] ret_from_fork at ffffffff8180024a
crash_x86_64> bt -l 12918 | grep -A 1 ptlrpc_check_set
#2 [ffffc9000536bd30] ptlrpc_check_set at ffffffffa07d5e3e [ptlrpc]
/home/abuild/rpmbuild/BUILD/cray-lustre-2.11.0.200_cray_90_g022bd19/lustre/ptlrpc/client.c: 1474
ptlrpcd_03_00 is trying to get the imp_lock; ptlrpd_03_02 holds it:
CPU 7 RUNQUEUE: ffff88085f9e1f00
CURRENT: PID: 12918 TASK: ffff880825b3a1c0 COMMAND: "ptlrpcd_03_00"
RT PRIO_ARRAY: ffff88085f9e2140
[no tasks queued]
CFS RB_ROOT: ffff88085f9e1fb0
[120] PID: 3968 TASK: ffff88050760afc0 COMMAND: "kworker/7:0"
[120] PID: 13361 TASK: ffff88083f701080 COMMAND: "gunicorn"
[120] PID: 32734 TASK: ffff880333e06480 COMMAND: "tail"
[120] PID: 13384 TASK: ffff880837144140 COMMAND: "gunicorn"
[120] PID: 15161 TASK: ffff880825ea1240 COMMAND: "sssd_nss"
[120] PID: 14128 TASK: ffff88076005a040 COMMAND: "mongod"
[120] PID: 10737 TASK: ffff880836f86240 COMMAND: "cron"
[120] PID: 44 TASK: ffff88082372b000 COMMAND: "ksoftirqd/7"
[120] PID: 12920 TASK: ffff880834b47040 COMMAND: "ptlrpcd_03_02"