diff --git a/libcfs/include/libcfs/list.h b/libcfs/include/libcfs/list.h --- a/libcfs/include/libcfs/list.h +++ b/libcfs/include/libcfs/list.h @@ -22,6 +22,8 @@ typedef struct list_head cfs_list_t; #define cfs_list_empty(head) list_empty(head) #define cfs_list_empty_careful(head) list_empty_careful(head) +#define cfs_list_is_singular(head) list_is_singular(head) + #define __cfs_list_splice(list, head) __list_splice(list, head) #define cfs_list_splice(list, head) list_splice(list, head) @@ -219,7 +221,7 @@ static inline void cfs_list_move_tail(cfs_list_t *list, * Test whether a list is empty * \param head the list to test. */ -static inline int cfs_list_empty(cfs_list_t *head) +static inline int cfs_list_empty(const cfs_list_t *head) { return head->next == head; } @@ -242,6 +244,15 @@ static inline int cfs_list_empty_careful(const cfs_list_t *head) return (next == head) && (next == head->prev); } +/** + * Tests whether a list has just one entry. + * \param head: the head of the list to test. + */ +static inline int cfs_list_is_singular(const cfs_list_t *head) +{ + return !cfs_list_empty(head) && (head->next == head->prev); +} + static inline void __cfs_list_splice(cfs_list_t *list, cfs_list_t *head) { diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -75,6 +75,7 @@ #include #include #include +#include #include #include @@ -1082,6 +1083,20 @@ struct ptlrpc_service { svc_req_printfn_t srv_req_printfn; /** @} */ + /** NRS-related fields */ + /** Protects NRS-related data */ + cfs_spinlock_t srv_nrs_lock; + /** List of available policies */ + cfs_list_t srv_nrs_policies; + /** Currently active NRS policy */ + struct nrs_pol_svc_info *srv_nrs_current; + /** NRS request-related operations; avoids lots of dereferencing */ + const struct nrs_pol_req_ops *srv_nrs_req_ops; + /** NRS request-related operations for the secondary policy */ + const struct nrs_pol_req_ops *srv_nrs_sec_req_ops; + /** NRS proc entry */ + cfs_proc_dir_entry_t *srv_nrs_proc_entry; + /** Root of /proc dir tree for this service */ cfs_proc_dir_entry_t *srv_procroot; /** Pointer to statistic data for this service */ @@ -1475,6 +1490,7 @@ struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c, svc_handler_t h, char *name, struct proc_dir_entry *proc_entry, svc_req_printfn_t prntfn, + const struct nrs_pol_supp *nrs_pols, char *threadname); struct ptlrpc_service *ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, @@ -1486,6 +1502,7 @@ struct ptlrpc_service *ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, svc_req_printfn_t, int min_threads, int max_threads, char *threadname, __u32 ctx_tags, + const struct nrs_pol_supp *nrs_pols, svc_hpreq_handler_t); void ptlrpc_stop_all_threads(struct ptlrpc_service *svc); @@ -1497,6 +1514,7 @@ void ptlrpc_daemonize(char *name); int ptlrpc_service_health_check(struct ptlrpc_service *); void ptlrpc_hpreq_reorder(struct ptlrpc_request *req); void ptlrpc_server_drop_request(struct ptlrpc_request *req); +int ptlrpc_server_normal_pending(struct ptlrpc_service *svc, int force); #ifdef __KERNEL__ int ptlrpc_hr_init(void); diff --git a/lustre/include/lustre_nrs.h b/lustre/include/lustre_nrs.h --- /dev/null +++ b/lustre/include/lustre_nrs.h @@ -0,0 +1,376 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * TODO: Copyright and licensing boilerplates to be inserted here. + * + */ +#ifndef _LUSTRE_NRS_H +#define _LUSTRE_NRS_H + +#include + +/** \defgroup nrs nrs + * + * NRS core. + * + * The Network Request Scheduler (NRS) is a software component inside the PTLRPC + * layer, that allows to employ different 'policies' which can be used to tailor + * the manner in which RPCs are dispatched from a PTLRPC service, in order to + * achieve an 'effect'. In the common case this may be increased throughput seen + * at server nodes by reordering RPCs in order to present a workload to the + * underlying disk I/O scheduler that will ultimately result in fewer disk seeks, + * in server nodes that employ rotating media in their targets. However, other + * applications are also possible, such as forming the basis of a storage QoS + * mechanism, or providing service prioritization amongst filesystem clients. + * + * \see http://wiki.lustre.org/index.php/Architecture_-_Network_Request_Scheduler + * \see https://bugzilla.lustre.org/show_bug.cgi?id=13634 + * + * @{ + */ + +#define NRS_POLICY_FIFO_NAME "FIFO" +#define NRS_POLICY_OBJ_EXTENTS_NAME "OBJ_EXTENTS" /* Note: Currently unused. */ +#define NRS_POLICY_DEFAULT_NAME NRS_POLICY_FIFO_NAME + +struct ptlrpc_service; +struct ptlrpc_request; + +struct nrs_pol_supp; +struct nrs_policy; +struct nrs_pol_svc_info; + +extern const struct nrs_pol_supp nrs_pol_supp_def; + +/** + * Callers of \a ptlrpc_init_svc and \a ptlrpc_init_svc_conf can use this to + * specify that a service only supports the default NRS policy. + * + * \see ldlm_setup(), mgs_setup(), ost_setup() + */ +#define NRS_POL_SUPP_DEF (&nrs_pol_supp_def) + +/** + * Policy flags. + */ +enum nrs_pol_flags { + /** + * No special flags. + */ + NPF_NONE = (1 << 0), + /** + * Policy is a dominant one. This gives the policy higher priority + * compared to other policies, when selecting a policy for a service + * at service initialization time. + * + * \see nrs_svc_choose_init_policy() + */ + NPF_DOMINANT = (1 << 1), + NPF_MAX = (1 << 2) +}; + +/** + * A policy at a service can be in any of \a nrs_pol_svc_state states; only + * one policy can be in the \a ACTIVE, state at any given time; transitions + * occur either at service initialization time when choosing an initial policy + * for the service, or during normal service operation time, triggered by + * interaction with the NRS proc interface. + * + * \see nrs_svc_choose_init_policy(), lprocfs_svc_wr_active() + * + * \verbatim + * + * State Transitions: + * _ + * |$| + * | + * | nrs_svc_get_policies() + * | + * | nrs_svc_choose_init_policy(), + * V lprocfs_svc_wr_active(), + * +------+-------------------------->AVAILABLE----------------------------+ + * | | ^ | + * | | | | + * | | | | + * | | | | + * | | | | + * | | | | + * | | lprocfs_svc_set_state() | lprocfs_svc_wr_active() | + o | | lprocfs_svc_set_state() | | + * | DISABLED<-----------------------ACTIVE<-----------------------------+ + * | | | + * | | | + * | | | + * | | lprocfs_svc_set_state() | + * | +-------------------------------+ | + * | | | + * | lprocfs_svc_wr_active() | | + * +--------------------------------- SECONDARY----------------------------+ + * + * + * \endverbatim + * + * \note These should be kept in accordance with \a nrs_policy_state_names in + * \a nrs_state_name(). + */ +enum nrs_pol_svc_state { + NPS_INVALID, + /** + * The policy is actively being used to handle the types of RPCs the + * policy supports for the service. + */ + NPS_ACTIVE, + /** + * The policy is being used as the secondary policy; i.e. the ACTIVE + * policy has indicated that this policy is to be used to handle any + * RPC types that the ACTIVE policy does not support directly. + */ + NPS_SECONDARY, + /** + * The policy is AVAILABLE to act as the ACTIVE, or SECONDARY policy. + */ + NPS_AVAILABLE, + /** + * Although the policy has registered with NRS, it is in a state where + * it can not be used reliably, for some reason. + * + * TODO: Extend this to be triggered by writing to a proc file. + */ + NPS_DISABLED, + NPS_NR +}; + +/** + * Contains request-related operations that each NRS policy is \a required to + * implement. + * + * \note Request operations are called without a check for NULL, in order to + * cater for speed. At present, there is no provision for influencing + * handling of the high-priority RPC queue of a service. + */ +struct nrs_pol_req_ops { + /** + * Adds normal priority requests for later processing. + * + * \pre cfs_spin_is_locked(&svc->srv_rq_lock). + * + * \see nrs_fifo_req_add() + */ + void (*request_add)(struct ptlrpc_service *svc, + struct ptlrpc_request *req); + + /** + * Obtains a request that is ready for handling. + * + * \retval +ve (success). + * \retval NULL (failure). + * + * \see nrs_fifo_req_normal_get(). + * + * \note In case where the primary and secondary policies are queueing + * requests in different data structures, the \a request_get() callback + * needs to cater for supplying requests from both sources; only one + * callback is used in order to cater for performance, and simplicity + * inside NRS core; if policies need to handle requests in their own + * data structures, whilst making use of a secondary policy that queues + * requests in the normal service request queue, it is best if they + * cater for this situation themselves, by use of their own heuristics + * for serving requests. + */ + struct ptlrpc_request * (*request_get)(struct ptlrpc_service *svc); + + /** + * Checks whether a non high-priority request is pending to be handled + * by a service thread. + * + * Policies can use this to signify there are one or more requests + * that have passed the pre-processing stage and are ready to be + * handled. + * + * \param svc service. + * \param force can be used to force the result of the operation to + * true. + * + * \retval 0 request is not pending. + * \retval 1 request is pending. + */ + int (*request_is_pending) (struct ptlrpc_service *svc, int force); +}; + +/** + * Policy-specific management operations. + * + * \note svc_cb_t functions are called at discrete points such as service + * initialization/teardown, and on some of the \a nrs_pol_svc_state state + * transitions. However, svc_cb_t callbacks for a policy's secondary policy + * are ignored for now. + * + * \see nrs_fifo_policy. + */ +struct nrs_pol_mgmt_ops { + /** + * Called upon policy registration with NRS core. + * + * \retval 0 (success). + * \retval -ve (failure). + */ + int (*pol_register) (void *pol_data); + + /** + * Called upon policy initialization at a service. + * + * \retval 0 (success). + * \retval -ve (failure, policy-specific failure condition). + */ + int (*svc_init) (struct ptlrpc_service *svc, + struct nrs_pol_svc_info *pol_svc_info); + + /** + * Called upon policy exit. + * TODO: Specify when exactly this happens, perhaps + * ptlrpc_unregister_service(). + */ + void (*svc_exit) (struct ptlrpc_service *svc, + struct nrs_pol_svc_info *pol_svc_info); + + /** + * Called when a policy transitions into the \a NPS_ACTIVE state. + * + * TODO: Should active take pol_info? It only needs np_private. + * + * \note callback should not sleep, and should be kept rather short + * if possible. + */ + int (*svc_active) (struct ptlrpc_service *svc, + struct nrs_pol_svc_info *pol_svc_info); + + /** + * Called when a policy transitions from the \a NPS_ACTIVE state. + * + * \note callback should not sleep, and should be kept rather short + * if possible. + */ + void (*svc_inactive) (struct ptlrpc_service *svc, + struct nrs_pol_svc_info *pol_svc_info); +}; + +/** + * Used by NRS core to maintain a list of registered policies. + * + * Secondary Policies: + * + * \a np_sec_pol allows a policy to denote that it will be making use of a + * secondary policy in order to handle types of RPCs that it does not wish to + * handle directly. The range of RPCs that are handled directly by the policy + * is determined by the policy internally. A secondary policy is used for + * handling the non-supported types of RPCs at the end of the pre-processing + * stage, in \a ptlrpc_nrs_req_add() and when obtaining an RPC to be handled + * in ptlrpc_nrs_req_get(). + * + * It is possible for policies in the ACTIVE state and the policies they make + * use of as their secondary policies to be queueing requests in separate data + * structures. The task of determining whether requests are available for handling + * and the act of dequeuing requests are handled by the policy in the ACTIVE + * state, in ptlrpc_nrs_normal_pending() and ptlrpc_nrs_req_get(); this is in + * order to avoid adding redundant calls in the case where only the default + * (FIFO) policy is being used, or the ACTIVE and secondary policy make use of + * common data structures, and can thus use the same set of related callbacks; + * the complexity of providing an arbitration mechanism when required, is + * transferred to the ACTIVE policy, as it is likely to be somewhat policy- + * specific anyway. + */ +struct nrs_policy { + /** Policy request-related operations. */ + const struct nrs_pol_req_ops np_req_ops; + /** Policy management-related operations. */ + const struct nrs_pol_mgmt_ops np_mgmt_ops; + /** Policy name. */ + const char *np_name; + /** Secondary policy, for unsupported RPCs */ + const struct nrs_policy *np_sec_pol; + /** Policy flags. */ + const enum nrs_pol_flags np_flags; + /** Chains off at the global nrs_core_info::nci_list. */ + cfs_list_t np_list; + /** Service-wide private policy data. */ + void *np_private; +}; + +/** + * Per-policy information block at a service. + * + * This is what services use to keep track of the status of a policy they are + * making use of. + */ +struct nrs_pol_svc_info { + /** Chains at ptlrpc_service::srv_nrs_policies. */ + cfs_list_t nsi_list; + /** Policy this structure refers to. */ + const struct nrs_policy *nsi_policy; + /** Policy state. */ + enum nrs_pol_svc_state nsi_state; + /** Per-service private policy data TODO: Change this to avoid derefs */ + void *nsi_private; +}; + +/** + * Policy support descriptor. + * + * Used to describe the set of _supported_ policies by a PTLRPC service. + * + * \a nrs_pol_supp structures are not meant to be used to describe the list of + * available policies, but rather used to describe the set of supported + * policies; this is then checked against the list of available policies in + * NRS core in \a nrs_svc_get_policies() at service initialization time, in + * order to produce the list of available policies for the service at + * ptlrpc_service::srv_nrs_policies. + * + * \see ptlrpc_init_svc(), nrs_svc_get_policies(), nrs_pol_supp_def. + */ +struct nrs_pol_supp { + /** Number of policies supported by the service. */ + __u8 nps_num_pols; + /** Names of policies supported by the service. */ + const char *nps_pol_names[]; +}; + +/* NRS core functions. */ +int nrs_policy_register(struct nrs_policy *pol); +int nrs_svc_get_policies(const struct nrs_pol_supp *pols, + struct ptlrpc_service *svc); +int nrs_svc_choose_init_policy(struct ptlrpc_service *svc); + +/* Init/fini functions. */ +int __init nrs_init(void); +void __exit nrs_fini(void); + +/* PTLRPC thread operations. */ +void ptlrpc_nrs_req_add(struct ptlrpc_service *svc, + struct ptlrpc_request *req); +int ptlrpc_nrs_normal_pending(struct ptlrpc_service *svc, int force); +struct ptlrpc_request * ptlrpc_nrs_req_get(struct ptlrpc_service *svc); + +/* Default FIFO policy operations. */ +void nrs_fifo_req_add(struct ptlrpc_service *svc, + struct ptlrpc_request *req); +struct ptlrpc_request * nrs_fifo_req_normal_get(struct ptlrpc_service *svc); + +/** + * No-op management callback operations for the default (FIFO) policy. + */ +static inline int nrs_fifo_mgmt_noop(void *data){ return 0; } +static inline void nrs_fifo_mgmt_svc_noop(struct ptlrpc_service *svc, + struct nrs_pol_svc_info *pol_svc_info) + { } +static inline int nrs_fifo_mgmt_svc_ret_noop(struct ptlrpc_service *svc, + struct nrs_pol_svc_info *pol_svc_info) + { return 0; } +#ifdef LPROCFS +int nrs_lprocfs_svc_register(struct ptlrpc_service *svc); +#else +static inline int nrs_lprocfs_svc_register(struct ptlrpc_service *svc) { return 0; } +#endif + +/** @} nrs */ + +#endif /* _LUSTRE_NRS_H */ diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -51,6 +51,7 @@ #endif #include +#include #include #include #include "ldlm_internal.h" @@ -2458,8 +2459,8 @@ static int ldlm_setup(void) ldlm_callback_handler, "ldlm_cbd", ldlm_svc_proc_dir, NULL, ldlm_min_threads, ldlm_max_threads, - "ldlm_cb", - LCT_MD_THREAD|LCT_DT_THREAD, NULL); + "ldlm_cb", LCT_MD_THREAD|LCT_DT_THREAD, + NRS_POL_SUPP_DEF, NULL); if (!ldlm_state->ldlm_cb_service) { CERROR("failed to start service\n"); @@ -2475,7 +2476,7 @@ static int ldlm_setup(void) ldlm_min_threads, ldlm_max_threads, "ldlm_cn", LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD, - NULL); + NRS_POL_SUPP_DEF, NULL); if (!ldlm_state->ldlm_cancel_service) { CERROR("failed to start service\n"); diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -73,6 +73,7 @@ #include #include #include +#include mdl_mode_t mdt_mdl_lock_modes[] = { [LCK_MINMODE] = MDL_MINMODE, @@ -3898,7 +3899,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) m->mdt_regular_service = ptlrpc_init_svc_conf(&conf, mdt_regular_handle, LUSTRE_MDT_NAME, procfs_entry, target_print_req, - LUSTRE_MDT_NAME); + NRS_POL_SUPP_DEF, LUSTRE_MDT_NAME); if (m->mdt_regular_service == NULL) RETURN(-ENOMEM); @@ -3925,7 +3926,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) m->mdt_readpage_service = ptlrpc_init_svc_conf(&conf, mdt_readpage_handle, LUSTRE_MDT_NAME "_readpage", - procfs_entry, target_print_req,"mdt_rdpg"); + procfs_entry, target_print_req, + NRS_POL_SUPP_DEF, "mdt_rdpg"); if (m->mdt_readpage_service == NULL) { CERROR("failed to start readpage service\n"); @@ -3953,7 +3955,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) m->mdt_setattr_service = ptlrpc_init_svc_conf(&conf, mdt_regular_handle, LUSTRE_MDT_NAME "_setattr", - procfs_entry, target_print_req,"mdt_attr"); + procfs_entry, target_print_req, + NRS_POL_SUPP_DEF, "mdt_attr"); if (!m->mdt_setattr_service) { CERROR("failed to start setattr service\n"); @@ -3983,7 +3986,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) m->mdt_mdsc_service = ptlrpc_init_svc_conf(&conf, mdt_mdsc_handle, LUSTRE_MDT_NAME"_mdsc", - procfs_entry, target_print_req,"mdt_mdsc"); + procfs_entry, target_print_req, + NRS_POL_SUPP_DEF, "mdt_mdsc"); if (!m->mdt_mdsc_service) { CERROR("failed to start seq controller service\n"); GOTO(err_mdt_svc, rc = -ENOMEM); @@ -4012,7 +4016,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) m->mdt_mdss_service = ptlrpc_init_svc_conf(&conf, mdt_mdss_handle, LUSTRE_MDT_NAME"_mdss", - procfs_entry, target_print_req,"mdt_mdss"); + procfs_entry, target_print_req, + NRS_POL_SUPP_DEF, "mdt_mdss"); if (!m->mdt_mdss_service) { CERROR("failed to start metadata seq server service\n"); GOTO(err_mdt_svc, rc = -ENOMEM); @@ -4044,7 +4049,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) m->mdt_dtss_service = ptlrpc_init_svc_conf(&conf, mdt_dtss_handle, LUSTRE_MDT_NAME"_dtss", - procfs_entry, target_print_req,"mdt_dtss"); + procfs_entry, target_print_req, + NRS_POL_SUPP_DEF, "mdt_dtss"); if (!m->mdt_dtss_service) { CERROR("failed to start data seq server service\n"); GOTO(err_mdt_svc, rc = -ENOMEM); @@ -4071,7 +4077,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) m->mdt_fld_service = ptlrpc_init_svc_conf(&conf, mdt_fld_handle, LUSTRE_MDT_NAME"_fld", - procfs_entry, target_print_req, "mdt_fld"); + procfs_entry, target_print_req, + NRS_POL_SUPP_DEF, "mdt_fld"); if (!m->mdt_fld_service) { CERROR("failed to start fld service\n"); GOTO(err_mdt_svc, rc = -ENOMEM); @@ -4100,7 +4107,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) m->mdt_xmds_service = ptlrpc_init_svc_conf(&conf, mdt_xmds_handle, LUSTRE_MDT_NAME "_mds", - procfs_entry, target_print_req,"mdt_xmds"); + procfs_entry, target_print_req, + NRS_POL_SUPP_DEF, "mdt_xmds"); if (m->mdt_xmds_service == NULL) { CERROR("failed to start xmds service\n"); diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -60,6 +60,7 @@ #include #include "mgs_internal.h" #include +#include /* Establish a connection to the MGS.*/ static int mgs_connect(const struct lu_env *env, @@ -243,7 +244,8 @@ static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg) mgs_handle, LUSTRE_MGS_NAME, obd->obd_proc_entry, target_print_req, MGS_THREADS_AUTO_MIN, MGS_THREADS_AUTO_MAX, - "ll_mgs", LCT_MD_THREAD, NULL); + "ll_mgs", LCT_MD_THREAD, NRS_POL_SUPP_DEF, + NULL); if (!mgs->mgs_service) { CERROR("failed to start service\n"); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -55,6 +55,7 @@ #include #include #include +#include #include "ost_internal.h" static int oss_num_threads; @@ -2584,7 +2585,8 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg) ost_handle, LUSTRE_OSS_NAME, obd->obd_proc_entry, target_print_req, oss_min_threads, oss_max_threads, - "ll_ost", LCT_DT_THREAD, NULL); + "ll_ost", LCT_DT_THREAD, NRS_POL_SUPP_DEF, + NULL); if (ost->ost_service == NULL) { CERROR("failed to start service\n"); GOTO(out_lprocfs, rc = -ENOMEM); @@ -2613,7 +2615,8 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg) ost_handle, "ost_create", obd->obd_proc_entry, target_print_req, oss_min_create_threads, oss_max_create_threads, - "ll_ost_creat", LCT_DT_THREAD, NULL); + "ll_ost_creat", LCT_DT_THREAD, NRS_POL_SUPP_DEF, + NULL); if (ost->ost_create_service == NULL) { CERROR("failed to start OST create service\n"); GOTO(out_service, rc = -ENOMEM); @@ -2630,7 +2633,8 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg) ost_handle, "ost_io", obd->obd_proc_entry, target_print_req, oss_min_threads, oss_max_threads, - "ll_ost_io", LCT_DT_THREAD, ost_hpreq_handler); + "ll_ost_io", LCT_DT_THREAD, NRS_POL_SUPP_DEF, + ost_hpreq_handler); if (ost->ost_io_service == NULL) { CERROR("failed to start OST I/O service\n"); GOTO(out_create, rc = -ENOMEM); diff --git a/lustre/ptlrpc/Makefile.in b/lustre/ptlrpc/Makefile.in --- a/lustre/ptlrpc/Makefile.in +++ b/lustre/ptlrpc/Makefile.in @@ -13,7 +13,7 @@ ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o recov_thread.o ptlrpc_objs += llog_net.o llog_client.o llog_server.o import.o ptlrpcd.o ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o ptlrpc_objs += sec.o sec_bulk.o sec_gc.o sec_config.o sec_lproc.o -ptlrpc_objs += sec_null.o sec_plain.o target.o +ptlrpc_objs += sec_null.o sec_plain.o target.o nrs.o ptlrpc-objs := $(ldlm_objs) $(ptlrpc_objs) diff --git a/lustre/ptlrpc/autoMakefile.am b/lustre/ptlrpc/autoMakefile.am --- a/lustre/ptlrpc/autoMakefile.am +++ b/lustre/ptlrpc/autoMakefile.am @@ -50,7 +50,7 @@ LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \ COMMON_SOURCES = client.c recover.c connection.c niobuf.c pack_generic.c \ events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c \ - llog_client.c llog_server.c import.c ptlrpcd.c pers.c wiretest.c \ + llog_client.c llog_server.c import.c ptlrpcd.c pers.c wiretest.c nrs.c \ ptlrpc_internal.h layout.c sec.c sec_bulk.c sec_gc.c sec_config.c \ sec_lproc.c sec_null.c sec_plain.c lproc_ptlrpc.c $(LDLM_COMM_SOURCES) @@ -83,6 +83,7 @@ ptlrpc_SOURCES := \ llog_server.c \ lproc_ptlrpc.c \ niobuf.c \ + nrs.c \ pack_generic.c \ pers.c \ pinger.c \ diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "ptlrpc_internal.h" @@ -671,6 +672,11 @@ void ptlrpc_lprocfs_register_service(struct proc_dir_entry *entry, 0400, &req_history_fops, svc); if (rc) CWARN("Error adding the req_history file\n"); + + rc = nrs_lprocfs_svc_register(svc); + if (rc) + CWARN("Error %d setting up nrs lprocfs entries for service %s" + "\n", rc, svc->srv_name); } void ptlrpc_lprocfs_register_obd(struct obd_device *obddev) @@ -725,6 +731,9 @@ EXPORT_SYMBOL(ptlrpc_lprocfs_brw); void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc) { + if (svc->srv_nrs_proc_entry != NULL) + lprocfs_remove(&svc->srv_nrs_proc_entry); + if (svc->srv_procroot != NULL) lprocfs_remove(&svc->srv_procroot); diff --git a/lustre/ptlrpc/nrs.c b/lustre/ptlrpc/nrs.c --- /dev/null +++ b/lustre/ptlrpc/nrs.c @@ -0,0 +1,1062 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * TODO: Copyright and licensing boilerplates to be inserted here. + * + */ + +/** + * NRS core. + * + * \addtogroup nrs nrs + * + * @{ + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_RPC + +#ifdef __KERNEL__ +#include +#include +#include +#else +#include +#endif +#include +#include +#include +#include +#include + +#define NRS_POL_NAME_MAX (32) +#define NRS_PROC_NAME "nrs" +#define NRS_LPROCFS_RD_POLICIES_HDR "name\n" +#define NRS_LPROCFS_NO_POL_MSG "no registered policies\n" +#define NRS_LPROCFS_RD_SVC_POLICIES_HDR "name state\n" +#define NRS_LPROCFS_NO_SVC_POL_MSG "no available policies\n" +#define NRS_LPROCFS_NO_ACTIVE_POL "no active policy\n" + +static struct nrs_pol_svc_info * nrs_get_pol_info(struct ptlrpc_service *svc, + const char *pol_name); + +#define nrs_currentpolinfo(svc) (svc)->srv_nrs_current +#define nrs_currentpol(svc) nrs_currentpolinfo(svc)->nsi_policy +#define nrs_pol2mops(pol) (pol)->np_mgmt_ops +#define nrs_polinfo2mops(pol_info) (pol_info)->nsi_policy->np_mgmt_ops +#define nrs_svc2ops(svc) (svc)->srv_nrs_req_ops +#define nrs_svc2secops(svc) (svc)->srv_nrs_sec_req_ops +#define nrs_currentname(svc) nrs_currentpol(svc)->np_name +#define nrs_polinfo2name(pol_info) (pol_info)->nsi_policy->np_name +#define nrs_polinfo2sec(pol_info) (pol_info)->nsi_policy->np_sec_pol +#define nrs_polinfo2secname(pol_info) nrs_polinfo2sec(pol_info)->np_name +#define nrs_currentmops(svc) nrs_pol2mops(nrs_currentpol(svc)) +#define nrs_currentsecpol(svc) nrs_currentpol(svc)->np_sec_pol +#define nrs_currentsecmops(svc) nrs_currentsecpol(svc)->np_mgmt_ops +#define nrs_currentsecname(svc) nrs_currentsecpol(svc)->np_name +#define nrs_currentsecinfo(svc) nrs_get_pol_info(svc, nrs_currentsecname(svc)) +#define nrs_polinfo2secinfo(svc, pol_info) nrs_get_pol_info(svc, \ + nrs_polinfo2secname(pol_info)) + +/** + * Holds NRS core information. + * + * Maintains a list of registered policies and related information. + */ +static struct { + __u8 nci_num_pols; + cfs_list_t nci_list; + cfs_proc_dir_entry_t *nci_proc_entry; + cfs_mutex_t nci_mutex; +} nrs_core_info; + +/** + * Finds whether a policy has been registered with NRS core, given the policy + * name. + * + * \param pol_name policy name. + * + * \retval 0 (policy has not been registered). + * \retval 1 (policy has already been registered). + */ +static int nrs_policy_exists(const char *pol_name) +{ + int rc = 0; + struct nrs_policy *pol_iter; + ENTRY; + + LASSERT(pol_name); + + cfs_mutex_lock(&nrs_core_info.nci_mutex); + if (!nrs_core_info.nci_num_pols) + GOTO(out, rc = 0); + cfs_list_for_each_entry(pol_iter, &nrs_core_info.nci_list, np_list) + if (!strcmp(pol_iter->np_name, pol_name)) { + rc = 1; + break; + } +out: + cfs_mutex_unlock(&nrs_core_info.nci_mutex); + RETURN(rc); +} + +/** + * Registers a policy with NRS core. + * + * Adds the policy registration information in \a pol to the + * nrs_core_info::nci_list list. + * + * \param pol policy. + * + * \retval 0 (success). + * \retval -EEXIST (failure, policy has already been registered). + * \retval -EINVAL (failure, policy name too large). + * \retval -ve (failure, policy-specific failure condition). + */ +int nrs_policy_register(struct nrs_policy *pol) +{ + int rc; + ENTRY; + + LASSERT(pol); + LASSERT(pol->np_flags <= NPF_MAX); + + if (strlen(pol->np_name) > NRS_POL_NAME_MAX) { + CERROR("Name for NRS policy %s too long\n", + pol->np_name); + GOTO(out, rc = -EINVAL); + } + rc = nrs_policy_exists(pol->np_name); + if (!rc) { + rc = nrs_pol2mops(pol).pol_register(pol->np_private); + if (rc) { + CERROR("Policy registration for " + "policy %s failed with error %d\n", + pol->np_name, rc); + GOTO(out, rc); + } + cfs_mutex_lock(&nrs_core_info.nci_mutex); + cfs_list_add_tail(&pol->np_list, &nrs_core_info.nci_list); + nrs_core_info.nci_num_pols++; + cfs_mutex_unlock(&nrs_core_info.nci_mutex); + } else { + CERROR("Attempted to register NRS policy %s more than once\n", + pol->np_name); + rc = -EEXIST; + } +out: + RETURN(rc); +} +EXPORT_SYMBOL(nrs_policy_register); + +/** + * Adds an \a nrs_pol_svc_info struct corresponding to NRS policy \a pol, to the + * list of supported policies by service \a svc at + * ptlrpc_service::srv_nrs_policies. + * + * \param svc service. + * \param pol NRS policy. + * + * \retval 0 (success). + * \retval -ENOMEM (failure, OOM). + * \retval -ve (failure, policy-specific failure condition). + * + * \note Is called with nrs_core_info::nci_mutex held. + */ +static int nrs_svc_pol_alloc_add(struct ptlrpc_service *svc, + struct nrs_policy *pol) +{ + int rc = 0; + struct nrs_pol_svc_info *pol_info; + ENTRY; + + LASSERT(svc); + LASSERT(pol); + + OBD_ALLOC_PTR(pol_info); + if (!pol_info) { + CERROR("Failed to allocate memory for " + "nrs_pol_svc_info struct for " + "service %s\n", svc->srv_name); + RETURN(-ENOMEM); + } + rc = nrs_pol2mops(pol).svc_init(svc, pol_info); + if (rc) { + OBD_FREE_PTR(pol_info); + GOTO(out, rc); + } + pol_info->nsi_policy = pol; + pol_info->nsi_state = NPS_AVAILABLE; + cfs_spin_lock(&svc->srv_nrs_lock); + cfs_list_add_tail(&pol_info->nsi_list, &svc->srv_nrs_policies); + cfs_spin_unlock(&svc->srv_nrs_lock); +out: + RETURN(rc); +} + +/** + * Obtains the policies that are available to a service. + * + * Uses the passed \a nrs_pol_supp structure at \pols to populate a list of + * supported policies for service \a svc, backed by \a nrs_pol_svc_info + * structures with ptlrpc_service::srv_nrs_policies as the head of the list. + * + * \param pol supported policies descriptor. + * \param svc service to get the policies for. + * + * \retval 0 (success, one or more supported policies are available). + * \retval -EINVAL (failure, invalid \a nrs_pol_supp structure given). + * \retval -ENODEV (failure, no available policies). + * \retval -EALREADY (failure, service list already populated). + * \retval -ENOMEM (failure, OOM error). + * \retval -ve (failure, policy-specific failure condition) + */ +int nrs_svc_get_policies(const struct nrs_pol_supp *pols, + struct ptlrpc_service *svc) +{ + int rc = 0, i; + __u8 pols_found = 0; + struct nrs_policy *pol_iter; + ENTRY; + + LASSERT(svc); + LASSERT(pols); + LASSERT(ergo(pols->nps_num_pols, pols->nps_pol_names[0])); + + if (unlikely(!pols->nps_num_pols)) + RETURN(-EINVAL); + /* + * Accessing list without ptlrpc_service::srv_nrs_lock held; + * there should not be any contention here, as the function is only + * used at service initialization time. + */ + if (unlikely(!cfs_list_empty(&svc->srv_nrs_policies))) + RETURN(-EALREADY); + cfs_mutex_lock(&nrs_core_info.nci_mutex); + if (unlikely((!nrs_core_info.nci_num_pols))) + GOTO(out, rc = -ENODEV); + cfs_list_for_each_entry(pol_iter, &nrs_core_info.nci_list, np_list) + for (i = 0; i < pols->nps_num_pols; i++) + if (!strcmp(pol_iter->np_name, pols->nps_pol_names[i])) { + rc = nrs_svc_pol_alloc_add(svc, pol_iter); + if (!rc) + pols_found = 1; + else + /* + * Fail the operation if any of the + * policies fails to initialize; this + * can be changed, although this is + * the only initialization point, and + * policies should not fail unless + * there is a very good reason to do so. + */ + GOTO(out_dealloc, rc); + } + if (!pols_found) + rc = -ENODEV; + GOTO(out, rc); +out_dealloc: + /* + * Accessing list without ptlrpc_service::srv_nrs_lock held; + * there should not be any contention here, as the function is only + * used at service initialization time. + */ + if (!(cfs_list_empty(&svc->srv_nrs_policies))) { + struct nrs_pol_svc_info *pol_info, *next; + cfs_list_for_each_entry_safe(pol_info, next, + &svc->srv_nrs_policies, nsi_list) { + nrs_polinfo2mops(pol_info).svc_exit(svc, pol_info); + cfs_list_del(&pol_info->nsi_list); + OBD_FREE_PTR(pol_info); + } + } +out: + cfs_mutex_unlock(&nrs_core_info.nci_mutex); + RETURN(rc); +} +EXPORT_SYMBOL(nrs_svc_get_policies); + +/** + * Performs the assignments necessary to use the policy corresponding to + * \a pol_info as the ACTIVE policy for service \a svc. + * + * \param svc service. + * \param pol_info per-service per-polic information. + * \param setup_sec whether to perform the setup steps for the seconday as + * well. + */ +static inline void nrs_setup_active(struct ptlrpc_service *svc, + struct nrs_pol_svc_info *pol_info, + __u8 setup_sec) +{ + ENTRY; + + LASSERT(svc); + LASSERT(pol_info); + LASSERT(ergo(setup_sec, nrs_polinfo2sec(pol_info) && + !IS_ERR(nrs_polinfo2secinfo(svc, pol_info)))); + + pol_info->nsi_state = NPS_ACTIVE; + svc->srv_nrs_current = pol_info; + svc->srv_nrs_req_ops = &pol_info->nsi_policy->np_req_ops; + if (setup_sec) { + nrs_polinfo2secinfo(svc, pol_info)->nsi_state = NPS_SECONDARY; + nrs_svc2secops(svc) = &nrs_polinfo2sec(pol_info)->np_req_ops; + } + EXIT; +} + +/** + * Makes the policy backed by \a pol_info the currently active policy for + * the service \a svc. + * + * This is used either for setting an active policy during service + * initialization, or during normal service operation, when e.g. a service + * policy is changed by the appropriate proc filesystem operation; + * + * \param svc service to act upon. + * \param pol_info nrs_pol_svc_info structure for policy to be made active. + * \param is_init, when set, this is a call at service initialization time, + * when unset, it is a call at normal service operation time. + * + * \retval 0 (success). + * \retval -EALREADY (failure, policy already active). + * \retval -ve (failure, policy-specific condition). + * + * \note Assumes \a pol_info has been checked by \a nrs_policy_exists. + */ +static int nrs_policy_make_active(struct ptlrpc_service *svc, + struct nrs_pol_svc_info *pol_info, + __u8 is_init) +{ + int rc = 0; + int rc_err; + __u8 handle_sec = 0; + ENTRY; + + LASSERT(svc); + LASSERT(pol_info->nsi_state != NPS_DISABLED); + LASSERT(ergo(is_init, !svc->srv_nrs_current && !nrs_svc2ops(svc))); + + if (!is_init) { + /* + * This is a call at normal service operation time. + */ + /* Serialize the whole operation. */ + cfs_spin_lock(&svc->srv_nrs_lock); + /* + * Return if the requested policy is already active; there is + * no provision for only changing the secondary policy. + */ + if (pol_info == nrs_currentpolinfo(svc)) + GOTO(out_unlock_nrs, rc = -EALREADY); + /* + * Check whether a switch is also needed on the secondary policy + */ + if (nrs_polinfo2sec(pol_info) != nrs_currentsecpol(svc)) + handle_sec = 1; + /* + * Need to avoid handling any requests whilst the policy is + * changing. + */ + cfs_spin_lock(&svc->srv_rq_lock); + + /* + * Call the current policies' nrs_pol_mgmt_ops::svc_inactive + * callbacks. + */ + nrs_currentmops(svc).svc_inactive(svc, + nrs_currentpolinfo(svc)); + if (handle_sec && nrs_currentsecpol(svc)) { + nrs_currentsecmops(svc).svc_inactive(svc, + nrs_currentsecinfo(svc)); + } + + /* + * Call the new policies' nrs_pol_mgmt_ops::svc_active + * callbacks. + */ + rc = nrs_polinfo2mops(pol_info).svc_active(svc, pol_info); + if (rc) { + CERROR("Policy-specific activation function for" + "policy %.32s on service %s failed\n", + nrs_polinfo2name(pol_info), + svc->srv_name); + GOTO(out_prim, rc); + } + if (handle_sec && nrs_polinfo2sec(pol_info)) { + if (IS_ERR(nrs_polinfo2secinfo(svc, pol_info))) { + CERROR("Request to use secondary policy %s " + "which is not available at service %s\n", + nrs_polinfo2secname(pol_info), + svc->srv_name); + GOTO(out_sec, rc); + } + rc = nrs_pol2mops(nrs_polinfo2sec(pol_info)).svc_active + (svc, nrs_polinfo2secinfo(svc, pol_info)); + if (rc) { + CERROR("Policy-specific activation function for" + "policy %.32s on service %s failed\n", + nrs_polinfo2secname(pol_info), + svc->srv_name); + GOTO(out_sec, rc); + } + } + /* + * Secondary policy name is not printed out, as the operation + * was initiated by the used by only passing the primary policy + * name. + */ + CDEBUG(D_INFO, "NRS policy %s is replacing %s on service %s\n", + nrs_polinfo2name(pol_info), nrs_currentname(svc), + svc->srv_name); + /* Set the state for the previous policies to AVAILABLE. */ + svc->srv_nrs_current->nsi_state = NPS_AVAILABLE; + if (handle_sec && nrs_currentsecpol(svc)) + nrs_currentsecinfo(svc)->nsi_state = NPS_AVAILABLE; + /* Setup the service to make use of the new policies. */ + nrs_setup_active(svc, pol_info, handle_sec && + nrs_polinfo2sec(pol_info)); + } else { + /* + * This is a call at service initialization time; + * srv_nrs_lock is not required, as there is no contention. + */ + rc = nrs_polinfo2mops(pol_info).svc_active(svc, pol_info); + if (rc) { + CERROR("Policy-specific activation function for" + "policy %.32s on service %s failed\n", + nrs_polinfo2name(pol_info), + svc->srv_name); + GOTO(out, rc); + } + if (nrs_polinfo2sec(pol_info)) { + if (IS_ERR(nrs_polinfo2secinfo(svc, pol_info))) { + CERROR("Request to use secondary policy %s " + "which is not available at service %s\n", + nrs_polinfo2secname(pol_info), + svc->srv_name); + GOTO(out, rc); + } + rc = nrs_polinfo2sec(pol_info)->np_mgmt_ops.svc_active + (svc, nrs_polinfo2secinfo(svc, pol_info)); + if (rc) { + CERROR("Policy-specific activation function for" + " policy %.32s on service %s failed\n", + nrs_polinfo2secname(pol_info), + svc->srv_name); + GOTO(out, rc); + } + } + nrs_setup_active(svc, pol_info, + nrs_polinfo2sec(pol_info) != NULL); + GOTO(out, rc = 0); + } +out_sec: + nrs_polinfo2mops(pol_info).svc_inactive(svc, pol_info); +out_prim: + /* + * Re-activate the previously ACTIVE and SECONDARY (if there was one) + * policies. + */ + if (handle_sec && nrs_currentsecpol(svc)) { + rc_err = nrs_currentsecmops(svc).svc_active(svc, + nrs_currentsecinfo(svc)); + if (rc_err) + CERROR("Error %d in re-activating policy %s as the " + "secondary policy after policy switch failure, " + "on service %s. Server policy configuration may " + "have been impaired\n", rc_err, + nrs_currentsecname(svc), svc->srv_name); + } + rc_err = nrs_currentmops(svc).svc_active(svc, nrs_currentpolinfo(svc)); + if (rc_err) + CERROR("Error %d in re-activating policy %s as the primary " + "policy after policy switch failure, on service %s. " + "Server policy configuration may have been impaired\n", + rc_err, nrs_currentname(svc), svc->srv_name); + + cfs_spin_unlock(&svc->srv_rq_lock); +out_unlock_nrs: + cfs_spin_unlock(&svc->srv_nrs_lock); +out: + RETURN(rc); +} + +/** + * Chooses one of the available policies to be made the currently active one at + * service initialization time. + * + * Uses ptlrpc_service::srv_nrs_policies to pick the policy out of the list + * of available policies for the service, based on policy state and presence + * of the \a NPF_DOMINANT flag. Populates ptlrpc_service::srv_nrs_current and + * ptlrpc_service::srv_nrs_req_ops. + * + * \param svc service to choose initial policy for. + * + * \retval 0 (success). + * \retval -ENOENT (failure, no available policies exist for the service). + */ +int nrs_svc_choose_init_policy(struct ptlrpc_service *svc) +{ + int rc = -ENOENT; + struct nrs_pol_svc_info *pol_info = NULL, *pol_info_dom = NULL, + *pol_iter; + ENTRY; + + LASSERT(svc); + + cfs_spin_lock(&svc->srv_nrs_lock); + if (unlikely(cfs_list_empty(&svc->srv_nrs_policies))) + GOTO(out, rc); + if (cfs_list_is_singular(&svc->srv_nrs_policies)) { + pol_info = cfs_list_entry(svc->srv_nrs_policies.next, + struct nrs_pol_svc_info, + nsi_list); + GOTO(out, rc = 0); + } + cfs_list_for_each_entry(pol_iter, &svc->srv_nrs_policies, nsi_list) + /* + * If a policy is found with the NPF_DOMINANT flag set, use + * that policy without traversing the list further. + */ + if ((pol_iter->nsi_policy->np_flags & NPF_DOMINANT) && + pol_iter->nsi_state == NPS_AVAILABLE) { + pol_info_dom = pol_iter; + break; + } + /* + * Use the first non dominant policy that is found on the list. + */ + else if (!pol_info && pol_iter->nsi_state == NPS_AVAILABLE) + pol_info = pol_iter; +out: + cfs_spin_unlock(&svc->srv_nrs_lock); + if (likely(pol_info || pol_info_dom)) { + rc = nrs_policy_make_active(svc, pol_info_dom ? : pol_info, 1); + if (rc) + CERROR("Failed to make policy %s active on service %s\n", + pol_info_dom ? nrs_polinfo2name(pol_info_dom) + : nrs_polinfo2name(pol_info), + svc->srv_name); + } + RETURN(rc); +} +EXPORT_SYMBOL(nrs_svc_choose_init_policy); + +/* + * PTLRPC thread-related operations. + */ + +/** + * Adds a request to the normal request queue (or equivalent construct for a + * given policy). + * + * Calls the \a request_add() operation of the policy in the ACTIVE state, + * in order to handle addition of the request at \a req. + * + * \note Policies in the ACTIVE state that are making use of a secondary policy + * to handle certain types of RPCs, need to cater for properly adding requests that + * are being handled by both policies, by delegating the \a request_add() + * operation to the secondary policy's request_add() operation, for types of + * RPCs that are not supported by the active policy. + * + * \param svc service. + * \param req request to add. + */ +inline void ptlrpc_nrs_req_add(struct ptlrpc_service *svc, + struct ptlrpc_request *req) +{ + ENTRY; + LASSERT(svc); + LASSERT(req); + + CDEBUG(D_INFO, "Handling request addition via NRS policy %s on service " + "%s\n", nrs_currentname(svc), svc->srv_name); + nrs_svc2ops(svc)->request_add(svc, req); + EXIT; +} +EXPORT_SYMBOL(ptlrpc_nrs_req_add); + +/** + * Checks whether one or more normal-priority requests are pending to be + * handled after the pre-processing stage. + * + * \note Policies in the ACTIVE state that are making use of a secondary + * policy to handle RPC types they do not offer support for, need to properly + * indicate whether a request is pending that is being handled by either + * policy. This has been done in order to avoid performing a second callback + * inside NRS core, for performance reasons. + * + * \param svc service. + * \param force used to force a positive outcome. + * + * \retval 0 (request is not pending). + * \retval 1 (request is pending). + */ +inline int ptlrpc_nrs_normal_pending(struct ptlrpc_service *svc, + int force) +{ + LASSERT(svc); + ENTRY; + + RETURN(nrs_svc2ops(svc)->request_is_pending(svc, force)); +} +EXPORT_SYMBOL(ptlrpc_nrs_normal_pending); + +/** + * Obtains a request for handling using the current policy's \a request_get + * operation. + * + * The request can either come from the normal request queue for the service, + * or a similar source, handled by internally by the policy. + * + * \note Policies in the ACTIVE state that are making use of a secondary policy + * to handle certain types of RPCs, need to cater for servicing of requests that + * are being handled by both policies, either by dequeuing requests from a + * data structure that is used to queue RPCs handled by both policies + * (e.g ptlrpc_service:srv_request_queue) or by use of an arbitration mechanism + * to handle the dequeuing of requests. + * + * \param svc service. + * + * \retval +ve (a valid request). + */ +inline struct ptlrpc_request * ptlrpc_nrs_req_get(struct ptlrpc_service *svc) +{ + LASSERT(svc); + ENTRY; + + CDEBUG(D_INFO, "Obtaining request for handling via NRS policy " + "%s on service %s\n", nrs_currentname(svc), + svc->srv_name); + RETURN(nrs_svc2ops(svc)->request_get(svc)); +} +EXPORT_SYMBOL(ptlrpc_nrs_req_get); + +/* + * Default FIFO policy operations. + */ + +/** + * Default nrs_pol_req_ops::request_add() implementation. Adds a request to the + * service's normal request queue. + * + * \param svc service. + * \param req request. + * + * \pre cfs_spin_is_locked(&svc->srv_rq_lock). + */ +inline void nrs_fifo_req_add(struct ptlrpc_service *svc, + struct ptlrpc_request *req) +{ + ENTRY; + + LASSERT(svc); + LASSERT(req); + + cfs_list_add_tail(&req->rq_list, &svc->srv_request_queue); + EXIT; +} +EXPORT_SYMBOL(nrs_fifo_req_add); + +/** + * Default nrs_pol_req_ops::request_get() implementation. Obtains a request from + * the service's normal request queue. + * + * \param svc service. + * + * \retval +ve (a dequeued request, ready to be handled). + * + * \pre cfs_spin_is_locked(&svc->srv_rq_lock). + */ +inline +struct ptlrpc_request * nrs_fifo_req_normal_get(struct ptlrpc_service *svc) +{ + struct ptlrpc_request *req; + ENTRY; + + LASSERT(svc); + + req = cfs_list_entry(svc->srv_request_queue.next, struct ptlrpc_request, + rq_list); + svc->srv_hpreq_count = 0; + RETURN(req); +} +EXPORT_SYMBOL(nrs_fifo_req_normal_get); + +/** + * Default (FIFO) nrs_pol_req_ops::request_is_pending() implementation. Checks + * whether a request is available for handling on the service's normal request + * queue. + * + * \param svc service. + * \param force used to force a decision in \a ptlrpc_server_allow_normal. + * + * \retval 0 (request is not pending). + * \retval 1 (request is pending). + */ +inline int nrs_fifo_req_is_pending(struct ptlrpc_service *svc, int force) +{ + ENTRY; + + LASSERT(svc); + + RETURN(ptlrpc_server_normal_pending(svc, force)); +} +EXPORT_SYMBOL(nrs_fifo_req_is_pending); + +/** + * Returns a pointer to the \a nrs_pol_svc_info for the service at \a svc, + * given the policy name. + * + * \retval +ve (success). + * \retval -EINVAL (failure, policy has not been registered). + * \retval -ENOENT (failure, policy not available for the service). + */ +static struct nrs_pol_svc_info * nrs_get_pol_info(struct ptlrpc_service *svc, + const char *pol_name) +{ + struct nrs_pol_svc_info *pol_info; + ENTRY; + + LASSERT(svc); + LASSERT(pol_name); + + if (!(nrs_policy_exists(pol_name))) + RETURN(ERR_PTR(-EINVAL)); + cfs_spin_lock(&svc->srv_nrs_lock); + cfs_list_for_each_entry(pol_info, &svc->srv_nrs_policies, nsi_list) + if (!(strcmp(nrs_polinfo2name(pol_info), pol_name))) { + cfs_spin_unlock(&svc->srv_nrs_lock); + goto out; + } + cfs_spin_unlock(&svc->srv_nrs_lock); + pol_info = ERR_PTR(-ENOENT); +out: + RETURN(pol_info); +} + +/** + * Default (FIFO) NRS policy \a nrs_policy instance. + */ +struct nrs_policy nrs_fifo_policy = { + .np_req_ops = { + .request_add = nrs_fifo_req_add, + .request_get = nrs_fifo_req_normal_get, + .request_is_pending = nrs_fifo_req_is_pending + }, + .np_mgmt_ops = { + .pol_register = nrs_fifo_mgmt_noop, + .svc_init = nrs_fifo_mgmt_svc_ret_noop, + .svc_exit = nrs_fifo_mgmt_svc_noop, + .svc_active = nrs_fifo_mgmt_svc_ret_noop, + .svc_inactive = nrs_fifo_mgmt_svc_noop + }, + .np_name = NRS_POLICY_FIFO_NAME, + .np_sec_pol = NULL, + .np_flags = NPF_NONE, + .np_private = NULL +}; + +#define NRS_POL_DEFAULT (&nrs_fifo_policy) +#define NRS_NUM_POL_SUPP_DEF (1) + +/** + * \a nrs_pol_supp struct for default (FIFO) policy. + * + * \see NRS_POL_SUPP_DEF. + */ +const struct nrs_pol_supp nrs_pol_supp_def = { + .nps_num_pols = NRS_NUM_POL_SUPP_DEF, + .nps_pol_names = { + [0] = NRS_POLICY_DEFAULT_NAME + } +}; +EXPORT_SYMBOL(nrs_pol_supp_def); + +/* + * Lprocfs operations. + */ +#ifdef LPROCFS + +/** + * Obtains a description of all policies registered with NRS. + * + * \warning The total print out should not exceed the \a CFS_PAGE_SIZE. + */ +static int lprocfs_rd_policies(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int rc; + struct nrs_policy *pol_iter; + + *eof = 1; + cfs_mutex_lock(&nrs_core_info.nci_mutex); + if (unlikely(cfs_list_empty(&nrs_core_info.nci_list))) { + rc = snprintf(page, count, NRS_LPROCFS_NO_POL_MSG); + goto out; + } + rc = snprintf(page, count, NRS_LPROCFS_RD_POLICIES_HDR); + cfs_list_for_each_entry(pol_iter, &nrs_core_info.nci_list, np_list); + rc += snprintf(page, count, "%.32s\n", pol_iter->np_name); +out: + cfs_mutex_unlock(&nrs_core_info.nci_mutex); + return rc; +} + +/** + * Lprocfs entries registered with NRS core. + */ +static struct lprocfs_vars lprocfs_nrs_core_vars[] = { + { "policies", lprocfs_rd_policies, 0, 0 }, + { 0 } +}; + +/** + * Removes NRS core lprocfs entries. + */ +static void nrs_procfs_fini(void) +{ + ENTRY; + + if (nrs_core_info.nci_proc_entry) + lprocfs_remove(&nrs_core_info.nci_proc_entry); + EXIT; +} + +/** + * Initializes NRS core lprocfs variables. + * + * \retval 0 (initialization successful). + * \retval -ve (initialization failure). + */ +static int nrs_procfs_init(void) +{ + int rc; + ENTRY; + + nrs_core_info.nci_proc_entry = lprocfs_register(NRS_PROC_NAME, + proc_lustre_root, + NULL, NULL); + if (IS_ERR(nrs_core_info.nci_proc_entry)) { + rc = PTR_ERR(nrs_core_info.nci_proc_entry); + CERROR("Error %d in setting up lprocfs for NRS core\n", rc); + nrs_core_info.nci_proc_entry = NULL; + GOTO(out, rc); + } + rc = lprocfs_add_vars(nrs_core_info.nci_proc_entry, + lprocfs_nrs_core_vars, NULL); + if (rc) { + CERROR("Error %d in setting up NRS core lprocfs variables\n", + rc); + GOTO(out, rc); + } +out: + if (rc) + nrs_procfs_fini(); + RETURN(rc); +} + +/* + * Lprocfs service operations. + */ + +/** + * Returns a string containing the policy state name, given the policy state. + * + * \param state policy state. + * + * \retval +ve pointer to state name. + */ +static inline const char * nrs_pol_state_name(enum nrs_pol_svc_state state) +{ + static const struct { + enum nrs_pol_svc_state npsn_state; + const char *npsn_state_name; + } nrs_policy_state_names[NPS_NR] = { + /* + * N.B. The following should be kept in line with + * nrs_pol_svc_state. + */ + { NPS_INVALID, "Invalid" }, + { NPS_ACTIVE, "Active" }, + { NPS_SECONDARY, "Secondary" }, + { NPS_AVAILABLE, "Available" }, + { NPS_DISABLED, "Disabled" } + }; + LASSERT(state < NPS_NR); + return(nrs_policy_state_names[state].npsn_state_name); +} + +/** + * Read available service policies and their state at + * nrs_pol_svc_info::nsi_state. + */ +static int lprocfs_svc_rd_policies(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int rc; + struct nrs_pol_svc_info *pol_info; + struct ptlrpc_service *svc = data; + + *eof = 1; + cfs_spin_lock(&svc->srv_nrs_lock); + if (unlikely(cfs_list_empty(&svc->srv_nrs_policies))) { + rc = snprintf(page, strlen(NRS_LPROCFS_NO_SVC_POL_MSG), + NRS_LPROCFS_NO_SVC_POL_MSG); + goto out; + } + rc = snprintf(page, strlen(NRS_LPROCFS_RD_SVC_POLICIES_HDR), + NRS_LPROCFS_RD_SVC_POLICIES_HDR); + cfs_list_for_each_entry(pol_info, &svc->srv_nrs_policies, nsi_list); + rc += snprintf(page, 80, + "%-32.32s %-47.47s\n", + nrs_polinfo2name(pol_info), + nrs_pol_state_name(pol_info->nsi_state)); +out: + cfs_spin_unlock(&svc->srv_nrs_lock); + return rc; +} + +/** + * Read the name of the currently active policy for a service. + */ +static int lprocfs_svc_rd_active(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int rc; + struct ptlrpc_service *svc = data; + + LASSERT(svc); + + *eof = 1; + cfs_spin_lock(&svc->srv_nrs_lock); + if (!svc->srv_nrs_current || + svc->srv_nrs_current->nsi_state != NPS_ACTIVE) { + rc = snprintf(page, strlen(NRS_LPROCFS_NO_ACTIVE_POL), + NRS_LPROCFS_NO_ACTIVE_POL); + goto out; + } + rc = snprintf(page, NRS_POL_NAME_MAX, "%.32s", nrs_currentname(svc)); + /* TODO: Could optionally print the secondary policy here as well. */ +out: + cfs_spin_unlock(&svc->srv_nrs_lock); + return rc; +} + +/** + * Change the currently active policy for a service. + */ +static int lprocfs_svc_wr_active(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int rc = 0; + struct ptlrpc_service *svc = data; + struct nrs_pol_svc_info *pol_info; + char pol_name[NRS_POL_NAME_MAX + 1]; + + LASSERT(svc); + + if (count > NRS_POL_NAME_MAX) + return -EINVAL; + if (cfs_copy_from_user(pol_name, buffer, count)) + return -EFAULT; + pol_name[count] = '\0'; + /* TODO: This may in theory be racy; need to serialize access to the + * pol_info struct to avoid having it removed from down under, before + * nrs_policy_make_active() returns; removal will probably only + * happen at ptlrpc_unregister_service(). + */ + pol_info = nrs_get_pol_info(svc, pol_name); + if (IS_ERR(pol_info)) + return -EINVAL; + rc = nrs_policy_make_active(svc, pol_info, 0); + if (rc) + CERROR("Failed to make NRS policy %s active for service %s\n", + nrs_polinfo2name(pol_info), svc->srv_name); + return rc ? : count; +} + +/** + * NRS lprocfs per-service variables. + */ +static struct lprocfs_vars lprocfs_nrs_svc_vars[] = { + { "policies", lprocfs_svc_rd_policies, 0, 0 }, + { "active_policy", lprocfs_svc_rd_active, lprocfs_svc_wr_active, 0 }, + { 0 } +}; + +/** + * Registers NRS per-service lprocfs variabless. + * + * The nrs directory inside the proc root directory for each service allows + * access to NRS-related state for the service, and serves as a parent + * for additional proc entries that NRS policies may wish to create. + * + * \param svc the service to register the lprocfs variables for. + * + * \retval +ve (success). + * \retval ERR_PTR(errno) (failure). + */ +int nrs_lprocfs_svc_register(struct ptlrpc_service *svc) +{ + int rc = 0; + ENTRY; + + LASSERT(svc); + + svc->srv_nrs_proc_entry = lprocfs_register(NRS_PROC_NAME, svc->srv_procroot, + lprocfs_nrs_svc_vars, svc); + if (IS_ERR(svc->srv_nrs_proc_entry)) { + rc = PTR_ERR(svc->srv_nrs_proc_entry); + CERROR("Error %d setting up lprocfs NRS entries for service %s" + "\n", rc, svc->srv_name); + } + RETURN(rc); +} + +#else /* !LPROCFS */ +static int nrs_procfs_init(void) { return 0; } +static void nrs_procfs_fini(void) { } +#endif /* LPROCFS */ + +/** + * Registers the default NRS policy and NRS core lprocfs entries. + * + * \retval 0 (success). + * \retval -ve (failure). + */ +int __init nrs_init(void) +{ + int rc; + ENTRY; + + CFS_INIT_LIST_HEAD(&nrs_core_info.nci_list); + cfs_mutex_init(&nrs_core_info.nci_mutex); + rc = nrs_policy_register(NRS_POL_DEFAULT); + if (rc) { + CERROR("Unable to register default NRS policy, exiting!\n"); + GOTO(out, rc); + } + rc = nrs_procfs_init(); +out: + RETURN(rc); +} +EXPORT_SYMBOL(nrs_init); + +/** + * Exit function. + * TODO:Incomplete + */ +void __exit nrs_fini(void) +{ + ENTRY; + + nrs_procfs_fini(); +} +EXPORT_SYMBOL(nrs_fini); + +/** @} nrs */ diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -225,6 +225,7 @@ EXPORT_SYMBOL(ptlrpc_start_thread); EXPORT_SYMBOL(ptlrpc_unregister_service); EXPORT_SYMBOL(ptlrpc_service_health_check); EXPORT_SYMBOL(ptlrpc_hpreq_reorder); +EXPORT_SYMBOL(ptlrpc_server_normal_pending); /* pack_generic.c */ EXPORT_SYMBOL(lustre_msg_check_version); diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include "ptlrpc_internal.h" @@ -433,6 +434,7 @@ struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c, svc_handler_t h, char *name, struct proc_dir_entry *proc_entry, svc_req_printfn_t prntfn, + const struct nrs_pol_supp *nrs_pols, char *threadname) { return ptlrpc_init_svc(c->psc_nbufs, c->psc_bufsize, @@ -441,7 +443,7 @@ struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c, c->psc_watchdog_factor, h, name, proc_entry, prntfn, c->psc_min_threads, c->psc_max_threads, - threadname, c->psc_ctx_tags, NULL); + threadname, c->psc_ctx_tags, nrs_pols, NULL); } EXPORT_SYMBOL(ptlrpc_init_svc_conf); @@ -480,6 +482,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, svc_req_printfn_t svcreq_printfn, int min_threads, int max_threads, char *threadname, __u32 ctx_tags, + const struct nrs_pol_supp *nrs_pols, svc_hpreq_handler_t hp_handler) { int rc; @@ -567,6 +570,18 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, timeout is less than this, we'll be sending an early reply. */ at_init(&service->srv_at_estimate, 10, 0); + cfs_spin_lock_init(&service->srv_nrs_lock); + + /* Get available NRS policies for this service. */ + rc = nrs_svc_get_policies(nrs_pols, service); + if (rc) + GOTO(failed, NULL); + /* TODO: add to ptlrpc_unregister service, look at early teardown. */ + /* Choose the NRS policy this service will start with. */ + rc = nrs_svc_choose_init_policy(service); + if (rc) + GOTO(failed, NULL); + cfs_spin_lock (&ptlrpc_all_services_lock); cfs_list_add (&service->srv_list, &ptlrpc_all_services); cfs_spin_unlock (&ptlrpc_all_services_lock); @@ -1318,8 +1333,7 @@ static int ptlrpc_server_request_add(struct ptlrpc_service *svc, if (rc) ptlrpc_hpreq_reorder_nolock(svc, req); else - cfs_list_add_tail(&req->rq_list, - &svc->srv_request_queue); + ptlrpc_nrs_req_add(svc, req); } cfs_spin_unlock(&svc->srv_rq_lock); @@ -1374,7 +1388,7 @@ static int ptlrpc_server_allow_normal(struct ptlrpc_service *svc, int force) return svc->srv_n_active_hpreq > 0 || svc->srv_hpreq_handler == NULL; } -static int ptlrpc_server_normal_pending(struct ptlrpc_service *svc, int force) +int ptlrpc_server_normal_pending(struct ptlrpc_service *svc, int force) { return ptlrpc_server_allow_normal(svc, force) && !cfs_list_empty(&svc->srv_request_queue); @@ -1392,7 +1406,7 @@ static inline int ptlrpc_server_request_pending(struct ptlrpc_service *svc, int force) { return ptlrpc_server_high_pending(svc, force) || - ptlrpc_server_normal_pending(svc, force); + ptlrpc_nrs_normal_pending(svc, force); } /** @@ -1414,10 +1428,8 @@ ptlrpc_server_request_get(struct ptlrpc_service *svc, int force) } - if (ptlrpc_server_normal_pending(svc, force)) { - req = cfs_list_entry(svc->srv_request_queue.next, - struct ptlrpc_request, rq_list); - svc->srv_hpreq_count = 0; + if (ptlrpc_nrs_normal_pending(svc, force)) { + req = ptlrpc_nrs_req_get(svc); RETURN(req); } RETURN(NULL);