Details
-
Bug
-
Resolution: Fixed
-
Critical
-
Lustre 2.4.0, Lustre 2.1.5, Lustre 1.8.9
-
no specific environment is needed
-
3
-
6971
Description
When ptlrpc_start_thread fails to create a new thread, it will finalize and free a struct ptlrpc_thread created and used here. Considering this, it can be problem when ptlrpc_svcpt_stop_thread is driven and handles the struct ptlrpc_thread right after or right before failure of cfs_create_thread.
This situation let the both of ptlrpc_start_thread and ptlrpc_svcpt_stop_threads access the freed ptlrpc_thread and cause OS panic.
ptlrpc_thread_start
int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait) { ... spin_lock(&svcpt->scp_lock); ... cfs_list_add(&thread->t_link, &svcpt->scp_threads); spin_unlock(&svcpt->scp_lock); if (svcpt->scp_cpt >= 0) { snprintf(thread->t_name, PTLRPC_THR_NAME_LEN, "%s%02d_%03d", svc->srv_thread_name, svcpt->scp_cpt, thread->t_id); } else { snprintf(thread->t_name, PTLRPC_THR_NAME_LEN, "%s_%04d", svc->srv_thread_name, thread->t_id); } CDEBUG(D_RPCTRACE, "starting thread '%s'\n", thread->t_name); /* * CLONE_VM and CLONE_FILES just avoid a needless copy, because we * just drop the VM and FILES in cfs_daemonize_ctxt() right away. */ rc = cfs_create_thread(ptlrpc_main, thread, CFS_DAEMON_FLAGS); if (rc < 0) { CERROR("cannot start thread '%s': rc %d\n", thread->t_name, rc); ////////////////////////////////////// // <---- let's say when // ptlrpc_svcpt_stop_thread is driven here ////////////////////////////////////// spin_lock(&svcpt->scp_lock); cfs_list_del(&thread->t_link); --svcpt->scp_nthrs_starting; spin_unlock(&svcpt->scp_lock); OBD_FREE(thread, sizeof(*thread)); RETURN(rc); } ... }
ptlrpc_svcpt_stop_threads
static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt) { struct l_wait_info lwi = { 0 }; struct ptlrpc_thread *thread; CFS_LIST_HEAD (zombie); ENTRY; CDEBUG(D_INFO, "Stopping threads for service %s\n", svcpt->scp_service->srv_name); spin_lock(&svcpt->scp_lock); /* let the thread know that we would like it to stop asap */ list_for_each_entry(thread, &svcpt->scp_threads, t_link) { CDEBUG(D_INFO, "Stopping thread %s #%u\n", svcpt->scp_service->srv_thread_name, thread->t_id); thread_add_flags(thread, SVC_STOPPING); } cfs_waitq_broadcast(&svcpt->scp_waitq); while (!cfs_list_empty(&svcpt->scp_threads)) { thread = cfs_list_entry(svcpt->scp_threads.next, struct ptlrpc_thread, t_link); if (thread_is_stopped(thread)) { cfs_list_del(&thread->t_link); cfs_list_add(&thread->t_link, &zombie); continue; } spin_unlock(&svcpt->scp_lock); CDEBUG(D_INFO, "waiting for stopping-thread %s #%u\n", svcpt->scp_service->srv_thread_name, thread->t_id); l_wait_event(thread->t_ctl_waitq, thread_is_stopped(thread), &lwi); spin_lock(&svcpt->scp_lock); } spin_unlock(&svcpt->scp_lock); while (!cfs_list_empty(&zombie)) { thread = cfs_list_entry(zombie.next, struct ptlrpc_thread, t_link); cfs_list_del(&thread->t_link); OBD_FREE_PTR(thread); } EXIT; }