As part of LU-10709 problem/deadlock analysis, it has been found that user-land processes intensivelly using sysfs can cause a dead-lock if doing so memory reclaim is being triggered and as part of it FS-specific shrinkers are run and directly/indirectly involving layers (like MD/Raid) also relying on sysfs. To fix this, sysfs inode allocation must no longer use the generic/GFP_KERNEL way but to be done as GFP_NOFS to prevent any FS operations to interfer during possible reclaim. Signed-off-by: Bruno Faccini --- orig/fs/inode.c 2017-09-09 07:06:42.000000000 +0000 +++ bfi/fs/inode.c 2018-03-14 09:24:48.533380200 +0000 @@ -73,7 +73,7 @@ struct inodes_stat_t inodes_stat; static DEFINE_PER_CPU(unsigned int, nr_inodes); static DEFINE_PER_CPU(unsigned int, nr_unused); -static struct kmem_cache *inode_cachep __read_mostly; +struct kmem_cache *inode_cachep __read_mostly; static int get_nr_inodes(void) { --- orig/fs/sysfs/sysfs.h 2017-09-09 07:06:42.000000000 +0000 +++ bfi/fs/sysfs/sysfs.h 2018-03-14 09:24:48.534380233 +0000 @@ -211,6 +211,8 @@ static inline void __sysfs_put(struct sy */ struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); void sysfs_evict_inode(struct inode *inode); +extern struct kmem_cache *inode_cachep; +struct inode *sysfs_alloc_inode(struct super_block *sb); int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); int sysfs_permission(struct inode *inode, int mask); int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); --- orig/fs/sysfs/mount.c 2017-09-09 07:06:42.000000000 +0000 +++ bfi/fs/sysfs/mount.c 2018-03-14 09:24:48.534380233 +0000 @@ -31,6 +31,7 @@ static const struct super_operations sys .statfs = simple_statfs, .drop_inode = generic_delete_inode, .evict_inode = sysfs_evict_inode, + .alloc_inode = sysfs_alloc_inode, }; struct sysfs_dirent sysfs_root = { --- orig/fs/sysfs/inode.c 2017-09-09 07:06:42.000000000 +0000 +++ bfi/fs/sysfs/inode.c 2018-03-14 09:24:48.534380233 +0000 @@ -314,6 +314,17 @@ void sysfs_evict_inode(struct inode *ino sysfs_put(sd); } +/* + * As a new inode allocation occurs with sysfs_mutex held and memory reclaim + * can be triggered doing so, this needs to happen with FS operations disabled + * to avoid any deadlock between shrinkers and FS/device layers doing + * extensive use of sysfs (like MD/Raid) as part of their operations. + */ +struct inode *sysfs_alloc_inode(struct super_block *sb) +{ + return kmem_cache_alloc(inode_cachep, GFP_NOFS); +} + int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const char *name) { struct sysfs_addrm_cxt acxt;