[21039.004239] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x135d1:0x0]// may get corrupted (rc -108) [21039.004242] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x135d2:0x0]// may get corrupted (rc -108) [21039.004257] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd8b6:0x0]// may get corrupted (rc -108) [21039.004259] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd8b5:0x0]// may get corrupted (rc -108) [21039.004260] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1349a:0x0]// may get corrupted (rc -108) [21039.004262] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf5ee:0x0]// may get corrupted (rc -108) [21039.004267] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf611:0x0]// may get corrupted (rc -108) [21039.004288] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13632:0x0]// may get corrupted (rc -108) [21039.004290] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc43b:0x0]// may get corrupted (rc -108) [21039.004293] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd8d5:0x0]// may get corrupted (rc -108) [21039.004295] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13644:0x0]// may get corrupted (rc -108) [21039.004298] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf63a:0x0]// may get corrupted (rc -108) [21039.004299] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf5c5:0x0]// may get corrupted (rc -108) [21039.004300] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcaf7:0x0]// may get corrupted (rc -108) [21039.004323] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf668:0x0]// may get corrupted (rc -108) [21039.004328] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13939:0x0]// may get corrupted (rc -108) [21039.004330] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1369e:0x0]// may get corrupted (rc -108) [21039.004332] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc0db:0x0]// may get corrupted (rc -108) [21039.004337] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13622:0x0]// may get corrupted (rc -108) [21039.004356] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x136d4:0x0]// may get corrupted (rc -108) [21039.004358] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc443:0x0]// may get corrupted (rc -108) [21039.004359] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x136d3:0x0]// may get corrupted (rc -108) [21039.004361] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcb01:0x0]// may get corrupted (rc -108) [21039.004366] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcae2:0x0]// may get corrupted (rc -108) [21039.004369] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x136f5:0x0]// may get corrupted (rc -108) [21039.004387] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd90e:0x0]// may get corrupted (rc -108) [21039.004389] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcb10:0x0]// may get corrupted (rc -108) [21039.004394] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf6a6:0x0]// may get corrupted (rc -108) [21039.004397] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf6aa:0x0]// may get corrupted (rc -108) [21039.004401] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13625:0x0]// may get corrupted (rc -108) [21039.004404] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd90d:0x0]// may get corrupted (rc -108) [21039.004405] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd925:0x0]// may get corrupted (rc -108) [21039.004407] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbf36:0x0]// may get corrupted (rc -108) [21039.004409] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13718:0x0]// may get corrupted (rc -108) [21039.004410] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf67a:0x0]// may get corrupted (rc -108) [21039.004411] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd8c6:0x0]// may get corrupted (rc -108) [21039.004427] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc0f2:0x0]// may get corrupted (rc -108) [21039.004430] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1384a:0x0]// may get corrupted (rc -108) [21039.004439] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13899:0x0]// may get corrupted (rc -108) [21039.004440] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf755:0x0]// may get corrupted (rc -108) [21039.004442] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf66b:0x0]// may get corrupted (rc -108) [21039.004443] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd951:0x0]// may get corrupted (rc -108) [21039.004445] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1389f:0x0]// may get corrupted (rc -108) [21039.004447] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13938:0x0]// may get corrupted (rc -108) [21039.004449] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd90f:0x0]// may get corrupted (rc -108) [21039.004463] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x138ea:0x0]// may get corrupted (rc -108) [21039.004466] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x138ec:0x0]// may get corrupted (rc -108) [21039.004476] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13931:0x0]// may get corrupted (rc -108) [21039.004477] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x138a1:0x0]// may get corrupted (rc -108) [21039.004479] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13933:0x0]// may get corrupted (rc -108) [21039.004481] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13934:0x0]// may get corrupted (rc -108) [21039.004483] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1389a:0x0]// may get corrupted (rc -108) [21039.004485] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x138f0:0x0]// may get corrupted (rc -108) [21039.004489] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf7b7:0x0]// may get corrupted (rc -108) [21039.004490] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x138ef:0x0]// may get corrupted (rc -108) [21039.004491] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13936:0x0]// may get corrupted (rc -108) [21039.004511] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf859:0x0]// may get corrupted (rc -108) [21039.004512] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf85a:0x0]// may get corrupted (rc -108) [21039.004524] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf85f:0x0]// may get corrupted (rc -108) [21039.004526] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf85b:0x0]// may get corrupted (rc -108) [21039.004527] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf863:0x0]// may get corrupted (rc -108) [21039.004528] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf866:0x0]// may get corrupted (rc -108) [21039.004530] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1389d:0x0]// may get corrupted (rc -108) [21039.004531] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd9d7:0x0]// may get corrupted (rc -108) [21039.004535] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd9ef:0x0]// may get corrupted (rc -108) [21039.004537] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf85c:0x0]// may get corrupted (rc -108) [21039.004538] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf838:0x0]// may get corrupted (rc -108) [21039.004566] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf932:0x0]// may get corrupted (rc -108) [21039.004567] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xda57:0x0]// may get corrupted (rc -108) [21039.004569] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xda3c:0x0]// may get corrupted (rc -108) [21039.004571] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf861:0x0]// may get corrupted (rc -108) [21039.004573] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xda58:0x0]// may get corrupted (rc -108) [21039.004575] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc4a5:0x0]// may get corrupted (rc -108) [21039.004577] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbdf1:0x0]// may get corrupted (rc -108) [21039.004579] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf92f:0x0]// may get corrupted (rc -108) [21039.004586] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf943:0x0]// may get corrupted (rc -108) [21039.004596] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfa59:0x0]// may get corrupted (rc -108) [21039.004598] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcc08:0x0]// may get corrupted (rc -108) [21039.004607] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xda5f:0x0]// may get corrupted (rc -108) [21039.004612] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc128:0x0]// may get corrupted (rc -108) [21039.004614] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf76f:0x0]// may get corrupted (rc -108) [21039.004615] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfa46:0x0]// may get corrupted (rc -108) [21039.004616] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcc27:0x0]// may get corrupted (rc -108) [21039.004619] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdad6:0x0]// may get corrupted (rc -108) [21039.004620] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdb24:0x0]// may get corrupted (rc -108) [21039.004622] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc102:0x0]// may get corrupted (rc -108) [21039.004626] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdaf6:0x0]// may get corrupted (rc -108) [21039.004638] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfb95:0x0]// may get corrupted (rc -108) [21039.004642] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc13a:0x0]// may get corrupted (rc -108) [21039.004645] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfbc6:0x0]// may get corrupted (rc -108) [21039.004647] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf905:0x0]// may get corrupted (rc -108) [21039.004650] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcc0f:0x0]// may get corrupted (rc -108) [21039.004655] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfc5f:0x0]// may get corrupted (rc -108) [21039.004656] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdbed:0x0]// may get corrupted (rc -108) [21039.004658] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfa60:0x0]// may get corrupted (rc -108) [21039.004675] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfa67:0x0]// may get corrupted (rc -108) [21039.004676] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdb9a:0x0]// may get corrupted (rc -108) [21039.004678] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfd18:0x0]// may get corrupted (rc -108) [21039.004679] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfd1f:0x0]// may get corrupted (rc -108) [21039.004693] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdd71:0x0]// may get corrupted (rc -108) [21039.004696] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xff27:0x0]// may get corrupted (rc -108) [21039.004707] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xffea:0x0]// may get corrupted (rc -108) [21039.004708] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfd1e:0x0]// may get corrupted (rc -108) [21039.004710] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcd63:0x0]// may get corrupted (rc -108) [21039.004711] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfc5d:0x0]// may get corrupted (rc -108) [21039.004713] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xffee:0x0]// may get corrupted (rc -108) [21039.004717] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xddd8:0x0]// may get corrupted (rc -108) [21039.004730] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10236:0x0]// may get corrupted (rc -108) [21039.004732] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10238:0x0]// may get corrupted (rc -108) [21039.004734] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc5bc:0x0]// may get corrupted (rc -108) [21039.004739] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xffdf:0x0]// may get corrupted (rc -108) [21039.004741] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdf02:0x0]// may get corrupted (rc -108) [21039.004743] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdefe:0x0]// may get corrupted (rc -108) [21039.004750] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1024d:0x0]// may get corrupted (rc -108) [21039.004751] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcdf7:0x0]// may get corrupted (rc -108) [21039.004753] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcdf6:0x0]// may get corrupted (rc -108) [21039.004755] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcd68:0x0]// may get corrupted (rc -108) [21039.004773] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1033a:0x0]// may get corrupted (rc -108) [21039.004775] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x103b1:0x0]// may get corrupted (rc -108) [21039.004776] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc5eb:0x0]// may get corrupted (rc -108) [21039.004781] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x103e4:0x0]// may get corrupted (rc -108) [21039.004784] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x103e5:0x0]// may get corrupted (rc -108) [21039.004786] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1033b:0x0]// may get corrupted (rc -108) [21039.004791] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdafe:0x0]// may get corrupted (rc -108) [21039.004806] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe01b:0x0]// may get corrupted (rc -108) [21039.004808] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe015:0x0]// may get corrupted (rc -108) [21039.004810] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcc81:0x0]// may get corrupted (rc -108) [21039.004812] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x103c0:0x0]// may get corrupted (rc -108) [21039.004817] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10475:0x0]// may get corrupted (rc -108) [21039.004819] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc57b:0x0]// may get corrupted (rc -108) [21039.004820] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe011:0x0]// may get corrupted (rc -108) [21039.004821] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10438:0x0]// may get corrupted (rc -108) [21039.004836] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc613:0x0]// may get corrupted (rc -108) [21039.004838] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcea2:0x0]// may get corrupted (rc -108) [21039.004846] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10494:0x0]// may get corrupted (rc -108) [21039.004850] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcea6:0x0]// may get corrupted (rc -108) [21039.004851] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdfbf:0x0]// may get corrupted (rc -108) [21039.004852] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1054a:0x0]// may get corrupted (rc -108) [21039.004855] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe066:0x0]// may get corrupted (rc -108) [21039.004861] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10493:0x0]// may get corrupted (rc -108) [21039.004880] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xced7:0x0]// may get corrupted (rc -108) [21039.004881] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1059a:0x0]// may get corrupted (rc -108) [21039.004882] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1047a:0x0]// may get corrupted (rc -108) [21039.004884] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10613:0x0]// may get corrupted (rc -108) [21039.004885] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe06b:0x0]// may get corrupted (rc -108) [21039.004888] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10611:0x0]// may get corrupted (rc -108) [21039.004893] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcea3:0x0]// may get corrupted (rc -108) [21039.004907] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10530:0x0]// may get corrupted (rc -108) [21039.004910] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10615:0x0]// may get corrupted (rc -108) [21039.004912] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe188:0x0]// may get corrupted (rc -108) [21039.004914] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe189:0x0]// may get corrupted (rc -108) [21039.004916] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcf11:0x0]// may get corrupted (rc -108) [21039.004917] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x105f4:0x0]// may get corrupted (rc -108) [21039.004919] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10705:0x0]// may get corrupted (rc -108) [21039.004922] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1059d:0x0]// may get corrupted (rc -108) [21039.004943] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10706:0x0]// may get corrupted (rc -108) [21039.004946] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10842:0x0]// may get corrupted (rc -108) [21039.004948] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe248:0x0]// may get corrupted (rc -108) [21039.004950] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcf0f:0x0]// may get corrupted (rc -108) [21039.004953] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x107de:0x0]// may get corrupted (rc -108) [21039.004956] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe27c:0x0]// may get corrupted (rc -108) [21039.004958] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc67b:0x0]// may get corrupted (rc -108) [21039.004970] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe2c7:0x0]// may get corrupted (rc -108) [21039.004975] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcf6f:0x0]// may get corrupted (rc -108) [21039.004978] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe067:0x0]// may get corrupted (rc -108) [21039.004979] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1090b:0x0]// may get corrupted (rc -108) [21039.004981] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe2c5:0x0]// may get corrupted (rc -108) [21039.004985] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe2f5:0x0]// may get corrupted (rc -108) [21039.004986] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc690:0x0]// may get corrupted (rc -108) [21039.005005] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd009:0x0]// may get corrupted (rc -108) [21039.005015] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc226:0x0]// may get corrupted (rc -108) [21039.005016] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd00c:0x0]// may get corrupted (rc -108) [21039.005018] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcef7:0x0]// may get corrupted (rc -108) [21039.005019] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe3c8:0x0]// may get corrupted (rc -108) [21039.005021] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x107e1:0x0]// may get corrupted (rc -108) [21039.005023] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc227:0x0]// may get corrupted (rc -108) [21039.005025] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc1ff:0x0]// may get corrupted (rc -108) [21039.005048] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10c25:0x0]// may get corrupted (rc -108) [21039.005050] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10495:0x0]// may get corrupted (rc -108) [21039.005053] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10995:0x0]// may get corrupted (rc -108) [21039.005054] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1090d:0x0]// may get corrupted (rc -108) [21039.005056] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10a2a:0x0]// may get corrupted (rc -108) [21039.005058] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10c4b:0x0]// may get corrupted (rc -108) [21039.005060] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10adb:0x0]// may get corrupted (rc -108) [21039.005062] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe410:0x0]// may get corrupted (rc -108) [21039.005073] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe447:0x0]// may get corrupted (rc -108) [21039.005085] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe45d:0x0]// may get corrupted (rc -108) [21039.005087] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10c2c:0x0]// may get corrupted (rc -108) [21039.005088] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbe1b:0x0]// may get corrupted (rc -108) [21039.005089] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10cd2:0x0]// may get corrupted (rc -108) [21039.005091] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd055:0x0]// may get corrupted (rc -108) [21039.005092] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc235:0x0]// may get corrupted (rc -108) [21039.005093] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10c15:0x0]// may get corrupted (rc -108) [21039.005110] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10d48:0x0]// may get corrupted (rc -108) [21039.005114] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10d46:0x0]// may get corrupted (rc -108) [21039.005120] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe45f:0x0]// may get corrupted (rc -108) [21039.005122] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10d6e:0x0]// may get corrupted (rc -108) [21039.005125] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10d6d:0x0]// may get corrupted (rc -108) [21039.005126] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10cd3:0x0]// may get corrupted (rc -108) [21039.005128] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10d45:0x0]// may get corrupted (rc -108) [21039.005131] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10db3:0x0]// may get corrupted (rc -108) [21039.005133] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10db1:0x0]// may get corrupted (rc -108) [21039.005135] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10dba:0x0]// may get corrupted (rc -108) [21039.005137] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10d38:0x0]// may get corrupted (rc -108) [21039.005139] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10de4:0x0]// may get corrupted (rc -108) [21039.005149] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10ee3:0x0]// may get corrupted (rc -108) [21039.005151] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10ee9:0x0]// may get corrupted (rc -108) [21039.005156] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10eeb:0x0]// may get corrupted (rc -108) [21039.005159] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10c1f:0x0]// may get corrupted (rc -108) [21039.005161] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10eef:0x0]// may get corrupted (rc -108) [21039.005163] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10ef7:0x0]// may get corrupted (rc -108) [21039.005164] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10eed:0x0]// may get corrupted (rc -108) [21039.005166] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10efb:0x0]// may get corrupted (rc -108) [21039.005168] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10ef9:0x0]// may get corrupted (rc -108) [21039.005170] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10ef5:0x0]// may get corrupted (rc -108) [21039.005171] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10d3e:0x0]// may get corrupted (rc -108) [21039.005185] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1106f:0x0]// may get corrupted (rc -108) [21039.005186] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11070:0x0]// may get corrupted (rc -108) [21039.005188] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10e99:0x0]// may get corrupted (rc -108) [21039.005190] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10dbc:0x0]// may get corrupted (rc -108) [21039.005193] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11073:0x0]// may get corrupted (rc -108) [21039.005194] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11074:0x0]// may get corrupted (rc -108) [21039.005196] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11075:0x0]// may get corrupted (rc -108) [21039.005200] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x110d0:0x0]// may get corrupted (rc -108) [21039.005201] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1100f:0x0]// may get corrupted (rc -108) [21039.005202] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11072:0x0]// may get corrupted (rc -108) [21039.005218] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1116e:0x0]// may get corrupted (rc -108) [21039.005219] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10f84:0x0]// may get corrupted (rc -108) [21039.005221] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11179:0x0]// may get corrupted (rc -108) [21039.005222] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1115a:0x0]// may get corrupted (rc -108) [21039.005226] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1118f:0x0]// may get corrupted (rc -108) [21039.005227] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11196:0x0]// may get corrupted (rc -108) [21039.005228] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x111a3:0x0]// may get corrupted (rc -108) [21039.005231] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1107b:0x0]// may get corrupted (rc -108) [21039.005234] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe4df:0x0]// may get corrupted (rc -108) [21039.005238] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11010:0x0]// may get corrupted (rc -108) [21039.005239] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11159:0x0]// may get corrupted (rc -108) [21039.005245] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x111a2:0x0]// may get corrupted (rc -108) [21039.005259] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11175:0x0]// may get corrupted (rc -108) [21039.005260] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1129a:0x0]// may get corrupted (rc -108) [21039.005262] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe546:0x0]// may get corrupted (rc -108) [21039.005265] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11255:0x0]// may get corrupted (rc -108) [21039.005267] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1129f:0x0]// may get corrupted (rc -108) [21039.005268] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1124d:0x0]// may get corrupted (rc -108) [21039.005271] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe5b4:0x0]// may get corrupted (rc -108) [21039.005274] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11251:0x0]// may get corrupted (rc -108) [21039.005276] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x111a1:0x0]// may get corrupted (rc -108) [21039.005277] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11250:0x0]// may get corrupted (rc -108) [21039.005278] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1129d:0x0]// may get corrupted (rc -108) [21039.005280] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1129c:0x0]// may get corrupted (rc -108) [21039.005291] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc73a:0x0]// may get corrupted (rc -108) [21039.005306] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11333:0x0]// may get corrupted (rc -108) [21039.005308] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd12a:0x0]// may get corrupted (rc -108) [21039.005310] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1129e:0x0]// may get corrupted (rc -108) [21039.005312] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe639:0x0]// may get corrupted (rc -108) [21039.005315] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe637:0x0]// may get corrupted (rc -108) [21039.005317] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1135b:0x0]// may get corrupted (rc -108) [21039.005319] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11341:0x0]// may get corrupted (rc -108) [21039.005320] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe638:0x0]// may get corrupted (rc -108) [21039.005321] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe646:0x0]// may get corrupted (rc -108) [21039.005324] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1135d:0x0]// may get corrupted (rc -108) [21039.005325] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd13a:0x0]// may get corrupted (rc -108) [21039.005327] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11254:0x0]// may get corrupted (rc -108) [21039.005343] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc26e:0x0]// may get corrupted (rc -108) [21039.005345] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11438:0x0]// may get corrupted (rc -108) [21039.005347] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11358:0x0]// may get corrupted (rc -108) [21039.005349] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x138e8:0x0]// may get corrupted (rc -108) [21039.005350] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe6d8:0x0]// may get corrupted (rc -108) [21039.005352] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1143c:0x0]// may get corrupted (rc -108) [21039.005354] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc282:0x0]// may get corrupted (rc -108) [21039.005356] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1143d:0x0]// may get corrupted (rc -108) [21039.005357] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11435:0x0]// may get corrupted (rc -108) [21039.005358] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11443:0x0]// may get corrupted (rc -108) [21039.005373] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd19a:0x0]// may get corrupted (rc -108) [21039.005374] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd196:0x0]// may get corrupted (rc -108) [21039.005386] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe6b8:0x0]// may get corrupted (rc -108) [21039.005388] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc291:0x0]// may get corrupted (rc -108) [21039.005390] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe76f:0x0]// may get corrupted (rc -108) [21039.005391] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd1bd:0x0]// may get corrupted (rc -108) [21039.005393] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11584:0x0]// may get corrupted (rc -108) [21039.005394] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe761:0x0]// may get corrupted (rc -108) [21039.005395] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe7ae:0x0]// may get corrupted (rc -108) [21039.005397] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd1b5:0x0]// may get corrupted (rc -108) [21039.005398] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe767:0x0]// may get corrupted (rc -108) [21039.005401] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x115bc:0x0]// may get corrupted (rc -108) [21039.005403] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe768:0x0]// may get corrupted (rc -108) [21039.005423] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc78e:0x0]// may get corrupted (rc -108) [21039.005427] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11765:0x0]// may get corrupted (rc -108) [21039.005432] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11771:0x0]// may get corrupted (rc -108) [21039.005433] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe806:0x0]// may get corrupted (rc -108) [21039.005435] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11714:0x0]// may get corrupted (rc -108) [21039.005436] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11717:0x0]// may get corrupted (rc -108) [21039.005437] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11770:0x0]// may get corrupted (rc -108) [21039.005441] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1172a:0x0]// may get corrupted (rc -108) [21039.005443] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe801:0x0]// may get corrupted (rc -108) [21039.005455] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11845:0x0]// may get corrupted (rc -108) [21039.005457] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11850:0x0]// may get corrupted (rc -108) [21039.005465] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11856:0x0]// may get corrupted (rc -108) [21039.005471] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11854:0x0]// may get corrupted (rc -108) [21039.005473] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11772:0x0]// may get corrupted (rc -108) [21039.005474] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1143b:0x0]// may get corrupted (rc -108) [21039.005476] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe87e:0x0]// may get corrupted (rc -108) [21039.005477] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe872:0x0]// may get corrupted (rc -108) [21039.005495] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbe2c:0x0]// may get corrupted (rc -108) [21039.005496] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe8d6:0x0]// may get corrupted (rc -108) [21039.005501] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc7fd:0x0]// may get corrupted (rc -108) [21039.005502] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11857:0x0]// may get corrupted (rc -108) [21039.005504] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe8d8:0x0]// may get corrupted (rc -108) [21039.005505] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x115c0:0x0]// may get corrupted (rc -108) [21039.005507] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe8d5:0x0]// may get corrupted (rc -108) [21039.005508] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe8d9:0x0]// may get corrupted (rc -108) [21039.005529] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe905:0x0]// may get corrupted (rc -108) [21039.005536] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe91b:0x0]// may get corrupted (rc -108) [21039.005538] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc7e1:0x0]// may get corrupted (rc -108) [21039.005539] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe91c:0x0]// may get corrupted (rc -108) [21039.005543] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc814:0x0]// may get corrupted (rc -108) [21039.005544] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11719:0x0]// may get corrupted (rc -108) [21039.005545] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc2c1:0x0]// may get corrupted (rc -108) [21039.005567] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11a44:0x0]// may get corrupted (rc -108) [21039.005569] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe94c:0x0]// may get corrupted (rc -108) [21039.005572] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11a4e:0x0]// may get corrupted (rc -108) [21039.005574] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd2a0:0x0]// may get corrupted (rc -108) [21039.005576] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc2b3:0x0]// may get corrupted (rc -108) [21039.005578] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11a50:0x0]// may get corrupted (rc -108) [21039.005585] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11a1f:0x0]// may get corrupted (rc -108) [21039.005595] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc031:0x0]// may get corrupted (rc -108) [21039.005600] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe9a1:0x0]// may get corrupted (rc -108) [21039.005606] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11b01:0x0]// may get corrupted (rc -108) [21039.005607] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11b00:0x0]// may get corrupted (rc -108) [21039.005609] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11aff:0x0]// may get corrupted (rc -108) [21039.005611] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11999:0x0]// may get corrupted (rc -108) [21039.005613] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc809:0x0]// may get corrupted (rc -108) [21039.005617] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe9a5:0x0]// may get corrupted (rc -108) [21039.005627] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11a5f:0x0]// may get corrupted (rc -108) [21039.005638] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11a42:0x0]// may get corrupted (rc -108) [21039.005640] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ba1:0x0]// may get corrupted (rc -108) [21039.005641] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x138d1:0x0]// may get corrupted (rc -108) [21039.005643] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11af7:0x0]// may get corrupted (rc -108) [21039.005648] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11bc0:0x0]// may get corrupted (rc -108) [21039.005649] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ba9:0x0]// may get corrupted (rc -108) [21039.005651] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11b9d:0x0]// may get corrupted (rc -108) [21039.005653] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11bb4:0x0]// may get corrupted (rc -108) [21039.005655] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11b9f:0x0]// may get corrupted (rc -108) [21039.005669] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c35:0x0]// may get corrupted (rc -108) [21039.005671] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd344:0x0]// may get corrupted (rc -108) [21039.005676] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11bb2:0x0]// may get corrupted (rc -108) [21039.005687] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xea48:0x0]// may get corrupted (rc -108) [21039.005689] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c83:0x0]// may get corrupted (rc -108) [21039.005690] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c47:0x0]// may get corrupted (rc -108) [21039.005692] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbdaf:0x0]// may get corrupted (rc -108) [21039.005695] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c45:0x0]// may get corrupted (rc -108) [21039.005699] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c43:0x0]// may get corrupted (rc -108) [21039.005708] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c86:0x0]// may get corrupted (rc -108) [21039.005719] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c87:0x0]// may get corrupted (rc -108) [21039.005721] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x138ce:0x0]// may get corrupted (rc -108) [21039.005722] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd28d:0x0]// may get corrupted (rc -108) [21039.005724] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ca0:0x0]// may get corrupted (rc -108) [21039.005726] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ca1:0x0]// may get corrupted (rc -108) [21039.005746] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xea82:0x0]// may get corrupted (rc -108) [21039.005747] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11a4f:0x0]// may get corrupted (rc -108) [21039.005751] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11d1d:0x0]// may get corrupted (rc -108) [21039.005753] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeaaa:0x0]// may get corrupted (rc -108) [21039.005754] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeaab:0x0]// may get corrupted (rc -108) [21039.005756] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ca6:0x0]// may get corrupted (rc -108) [21039.005757] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc043:0x0]// may get corrupted (rc -108) [21039.005758] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ba0:0x0]// may get corrupted (rc -108) [21039.005759] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeab5:0x0]// may get corrupted (rc -108) [21039.005761] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xea81:0x0]// may get corrupted (rc -108) [21039.005764] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11cbc:0x0]// may get corrupted (rc -108) [21039.005787] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeb59:0x0]// may get corrupted (rc -108) [21039.005790] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbe36:0x0]// may get corrupted (rc -108) [21039.005791] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd3b2:0x0]// may get corrupted (rc -108) [21039.005793] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11da0:0x0]// may get corrupted (rc -108) [21039.005795] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11e42:0x0]// may get corrupted (rc -108) [21039.005796] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11e2d:0x0]// may get corrupted (rc -108) [21039.005797] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xea5c:0x0]// may get corrupted (rc -108) [21039.005799] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11d9f:0x0]// may get corrupted (rc -108) [21039.005801] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11e28:0x0]// may get corrupted (rc -108) [21039.005820] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeb67:0x0]// may get corrupted (rc -108) [21039.005821] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeb58:0x0]// may get corrupted (rc -108) [21039.005822] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd3ba:0x0]// may get corrupted (rc -108) [21039.005825] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd3bf:0x0]// may get corrupted (rc -108) [21039.005827] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11d1c:0x0]// may get corrupted (rc -108) [21039.005830] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc040:0x0]// may get corrupted (rc -108) [21039.005831] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeb70:0x0]// may get corrupted (rc -108) [21039.005851] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ca4:0x0]// may get corrupted (rc -108) [21039.005854] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xebc5:0x0]// may get corrupted (rc -108) [21039.005856] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11f3c:0x0]// may get corrupted (rc -108) [21039.005858] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xec17:0x0]// may get corrupted (rc -108) [21039.005859] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11e2e:0x0]// may get corrupted (rc -108) [21039.005861] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11fd5:0x0]// may get corrupted (rc -108) [21039.005863] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11fd9:0x0]// may get corrupted (rc -108) [21039.005865] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc313:0x0]// may get corrupted (rc -108) [21039.005884] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd3e5:0x0]// may get corrupted (rc -108) [21039.005890] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12024:0x0]// may get corrupted (rc -108) [21039.005894] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xec2f:0x0]// may get corrupted (rc -108) [21039.005897] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd3ec:0x0]// may get corrupted (rc -108) [21039.005898] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xebe5:0x0]// may get corrupted (rc -108) [21039.005904] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xec2e:0x0]// may get corrupted (rc -108) [21039.005924] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x120d0:0x0]// may get corrupted (rc -108) [21039.005925] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x120cf:0x0]// may get corrupted (rc -108) [21039.005927] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xec32:0x0]// may get corrupted (rc -108) [21039.005928] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11d19:0x0]// may get corrupted (rc -108) [21039.005930] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x120d5:0x0]// may get corrupted (rc -108) [21039.005931] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x120ce:0x0]// may get corrupted (rc -108) [21039.005933] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd400:0x0]// may get corrupted (rc -108) [21039.005934] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11e29:0x0]// may get corrupted (rc -108) [21039.005956] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xecaa:0x0]// may get corrupted (rc -108) [21039.005960] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x121a8:0x0]// may get corrupted (rc -108) [21039.005962] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xec7c:0x0]// may get corrupted (rc -108) [21039.005965] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x121ab:0x0]// may get corrupted (rc -108) [21039.005966] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc05a:0x0]// may get corrupted (rc -108) [21039.005968] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd42e:0x0]// may get corrupted (rc -108) [21039.005970] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xec5a:0x0]// may get corrupted (rc -108) [21039.005971] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeb72:0x0]// may get corrupted (rc -108) [21039.005973] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xecac:0x0]// may get corrupted (rc -108) [21039.005975] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x121c1:0x0]// may get corrupted (rc -108) [21039.005987] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12283:0x0]// may get corrupted (rc -108) [21039.005998] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12033:0x0]// may get corrupted (rc -108) [21039.006004] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x122e3:0x0]// may get corrupted (rc -108) [21039.006015] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd465:0x0]// may get corrupted (rc -108) [21039.006016] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xed19:0x0]// may get corrupted (rc -108) [21039.006018] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x122ef:0x0]// may get corrupted (rc -108) [21039.006021] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd430:0x0]// may get corrupted (rc -108) [21039.006022] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc061:0x0]// may get corrupted (rc -108) [21039.006023] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x122ec:0x0]// may get corrupted (rc -108) [21039.006025] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd472:0x0]// may get corrupted (rc -108) [21039.006038] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xed80:0x0]// may get corrupted (rc -108) [21039.006045] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc34c:0x0]// may get corrupted (rc -108) [21039.006046] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc8fb:0x0]// may get corrupted (rc -108) [21039.006049] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xed86:0x0]// may get corrupted (rc -108) [21039.006053] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x122e7:0x0]// may get corrupted (rc -108) [21039.006054] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x123a0:0x0]// may get corrupted (rc -108) [21039.006064] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12402:0x0]// may get corrupted (rc -108) [21039.006066] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xedcd:0x0]// may get corrupted (rc -108) [21039.006078] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xedd0:0x0]// may get corrupted (rc -108) [21039.006081] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc06e:0x0]// may get corrupted (rc -108) [21039.006082] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12151:0x0]// may get corrupted (rc -108) [21039.006084] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd4d1:0x0]// may get corrupted (rc -108) [21039.006086] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1239f:0x0]// may get corrupted (rc -108) [21039.006087] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc92b:0x0]// may get corrupted (rc -108) [21039.006089] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xedd1:0x0]// may get corrupted (rc -108) [21039.006090] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x122e6:0x0]// may get corrupted (rc -108) [21039.006109] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xee16:0x0]// may get corrupted (rc -108) [21039.006116] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd4e1:0x0]// may get corrupted (rc -108) [21039.006117] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1239e:0x0]// may get corrupted (rc -108) [21039.006118] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc078:0x0]// may get corrupted (rc -108) [21039.006119] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xee40:0x0]// may get corrupted (rc -108) [21039.006120] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x123a2:0x0]// may get corrupted (rc -108) [21039.006122] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd4f7:0x0]// may get corrupted (rc -108) [21039.006124] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12462:0x0]// may get corrupted (rc -108) [21039.006126] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xee59:0x0]// may get corrupted (rc -108) [21039.006127] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x124f2:0x0]// may get corrupted (rc -108) [21039.006153] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x125ea:0x0]// may get corrupted (rc -108) [21039.006154] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc956:0x0]// may get corrupted (rc -108) [21039.006156] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x125ab:0x0]// may get corrupted (rc -108) [21039.006157] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x125eb:0x0]// may get corrupted (rc -108) [21039.006160] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xee15:0x0]// may get corrupted (rc -108) [21039.006162] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc372:0x0]// may get corrupted (rc -108) [21039.006163] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbf00:0x0]// may get corrupted (rc -108) [21039.006165] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xee08:0x0]// may get corrupted (rc -108) [21039.006166] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x125ad:0x0]// may get corrupted (rc -108) [21039.006168] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd528:0x0]// may get corrupted (rc -108) [21039.006186] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x126c0:0x0]// may get corrupted (rc -108) [21039.006187] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x126c1:0x0]// may get corrupted (rc -108) [21039.006190] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeea9:0x0]// may get corrupted (rc -108) [21039.006191] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd56c:0x0]// may get corrupted (rc -108) [21039.006192] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x125e1:0x0]// may get corrupted (rc -108) [21039.006194] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc981:0x0]// may get corrupted (rc -108) [21039.006196] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x126c2:0x0]// may get corrupted (rc -108) [21039.006199] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc361:0x0]// may get corrupted (rc -108) [21039.006201] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x126c3:0x0]// may get corrupted (rc -108) [21039.006215] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xef50:0x0]// may get corrupted (rc -108) [21039.006221] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xee99:0x0]// may get corrupted (rc -108) [21039.006222] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x127e6:0x0]// may get corrupted (rc -108) [21039.006227] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x127e8:0x0]// may get corrupted (rc -108) [21039.006228] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd5a1:0x0]// may get corrupted (rc -108) [21039.006231] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xef12:0x0]// may get corrupted (rc -108) [21039.006233] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xef99:0x0]// may get corrupted (rc -108) [21039.006235] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xefa3:0x0]// may get corrupted (rc -108) [21039.006236] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd57f:0x0]// may get corrupted (rc -108) [21039.006255] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x126c4:0x0]// may get corrupted (rc -108) [21039.006257] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x128f8:0x0]// may get corrupted (rc -108) [21039.006259] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x128aa:0x0]// may get corrupted (rc -108) [21039.006261] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xefa2:0x0]// may get corrupted (rc -108) [21039.006262] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc9a6:0x0]// may get corrupted (rc -108) [21039.006265] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf020:0x0]// may get corrupted (rc -108) [21039.006267] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf000:0x0]// may get corrupted (rc -108) [21039.006270] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x128f3:0x0]// may get corrupted (rc -108) [21039.006290] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf07b:0x0]// may get corrupted (rc -108) [21039.006293] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd5ad:0x0]// may get corrupted (rc -108) [21039.006295] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf06f:0x0]// may get corrupted (rc -108) [21039.006297] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1299e:0x0]// may get corrupted (rc -108) [21039.006299] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf06c:0x0]// may get corrupted (rc -108) [21039.006300] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x128a7:0x0]// may get corrupted (rc -108) [21039.006302] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf07d:0x0]// may get corrupted (rc -108) [21039.006304] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12999:0x0]// may get corrupted (rc -108) [21039.006307] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf06e:0x0]// may get corrupted (rc -108) [21039.006308] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf07c:0x0]// may get corrupted (rc -108) [21039.006323] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12a14:0x0]// may get corrupted (rc -108) [21039.006324] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12a16:0x0]// may get corrupted (rc -108) [21039.006330] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf070:0x0]// may get corrupted (rc -108) [21039.006331] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x128d6:0x0]// may get corrupted (rc -108) [21039.006336] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf06b:0x0]// may get corrupted (rc -108) [21039.006338] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12a44:0x0]// may get corrupted (rc -108) [21039.006339] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1299c:0x0]// may get corrupted (rc -108) [21039.006341] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12a60:0x0]// may get corrupted (rc -108) [21039.006343] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12a47:0x0]// may get corrupted (rc -108) [21039.006364] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf16a:0x0]// may get corrupted (rc -108) [21039.006366] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12a46:0x0]// may get corrupted (rc -108) [21039.006368] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc3b0:0x0]// may get corrupted (rc -108) [21039.006369] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc9e8:0x0]// may get corrupted (rc -108) [21039.006370] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12aec:0x0]// may get corrupted (rc -108) [21039.006372] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12b42:0x0]// may get corrupted (rc -108) [21039.006374] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd675:0x0]// may get corrupted (rc -108) [21039.006376] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd690:0x0]// may get corrupted (rc -108) [21039.006378] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf169:0x0]// may get corrupted (rc -108) [21039.006380] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12a48:0x0]// may get corrupted (rc -108) [21039.006404] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc09f:0x0]// may get corrupted (rc -108) [21039.006406] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd69c:0x0]// may get corrupted (rc -108) [21039.006407] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12cde:0x0]// may get corrupted (rc -108) [21039.006409] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12be5:0x0]// may get corrupted (rc -108) [21039.006410] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf1e7:0x0]// may get corrupted (rc -108) [21039.006414] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd655:0x0]// may get corrupted (rc -108) [21039.006415] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf1e2:0x0]// may get corrupted (rc -108) [21039.006417] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd649:0x0]// may get corrupted (rc -108) [21039.006419] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd6c6:0x0]// may get corrupted (rc -108) [21039.006420] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf1e1:0x0]// may get corrupted (rc -108) [21039.006433] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf2b5:0x0]// may get corrupted (rc -108) [21039.006445] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12b3c:0x0]// may get corrupted (rc -108) [21039.006447] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12dd2:0x0]// may get corrupted (rc -108) [21039.006450] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf2ef:0x0]// may get corrupted (rc -108) [21039.006451] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12ce0:0x0]// may get corrupted (rc -108) [21039.006455] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc3d5:0x0]// may get corrupted (rc -108) [21039.006456] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd717:0x0]// may get corrupted (rc -108) [21039.006458] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf2ca:0x0]// may get corrupted (rc -108) [21039.006460] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12ded:0x0]// may get corrupted (rc -108) [21039.006461] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12e95:0x0]// may get corrupted (rc -108) [21039.006464] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12be4:0x0]// may get corrupted (rc -108) [21039.006480] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf373:0x0]// may get corrupted (rc -108) [21039.006484] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc0ac:0x0]// may get corrupted (rc -108) [21039.006491] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1301b:0x0]// may get corrupted (rc -108) [21039.006492] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf399:0x0]// may get corrupted (rc -108) [21039.006493] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc0b4:0x0]// may get corrupted (rc -108) [21039.006494] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13009:0x0]// may get corrupted (rc -108) [21039.006496] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12e93:0x0]// may get corrupted (rc -108) [21039.006497] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1301f:0x0]// may get corrupted (rc -108) [21039.006501] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc3e4:0x0]// may get corrupted (rc -108) [21039.006503] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1301e:0x0]// may get corrupted (rc -108) [21039.006521] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x130c5:0x0]// may get corrupted (rc -108) [21039.006524] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x130c4:0x0]// may get corrupted (rc -108) [21039.006525] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x130c0:0x0]// may get corrupted (rc -108) [21039.006527] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x130c7:0x0]// may get corrupted (rc -108) [21039.006528] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1301d:0x0]// may get corrupted (rc -108) [21039.006534] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x130f5:0x0]// may get corrupted (rc -108) [21039.006536] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x130c1:0x0]// may get corrupted (rc -108) [21039.006537] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x130ee:0x0]// may get corrupted (rc -108) [21039.006538] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf3f3:0x0]// may get corrupted (rc -108) [21039.006565] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf42a:0x0]// may get corrupted (rc -108) [21039.006567] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf429:0x0]// may get corrupted (rc -108) [21039.006569] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13181:0x0]// may get corrupted (rc -108) [21039.006571] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12be1:0x0]// may get corrupted (rc -108) [21039.006575] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13183:0x0]// may get corrupted (rc -108) [21039.006585] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13184:0x0]// may get corrupted (rc -108) [21039.006588] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x131b3:0x0]// may get corrupted (rc -108) [21039.006593] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x131b4:0x0]// may get corrupted (rc -108) [21039.006600] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf2c7:0x0]// may get corrupted (rc -108) [21039.006603] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd7b9:0x0]// may get corrupted (rc -108) [21039.006608] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1301a:0x0]// may get corrupted (rc -108) [21039.006609] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x131ee:0x0]// may get corrupted (rc -108) [21039.006610] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x131ed:0x0]// may get corrupted (rc -108) [21039.006612] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf472:0x0]// may get corrupted (rc -108) [21039.006613] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd7bb:0x0]// may get corrupted (rc -108) [21039.006616] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13180:0x0]// may get corrupted (rc -108) [21039.006635] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x130f1:0x0]// may get corrupted (rc -108) [21039.006637] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13247:0x0]// may get corrupted (rc -108) [21039.006642] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x131eb:0x0]// may get corrupted (rc -108) [21039.006644] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x131ea:0x0]// may get corrupted (rc -108) [21039.006646] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1324c:0x0]// may get corrupted (rc -108) [21039.006648] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x131b5:0x0]// may get corrupted (rc -108) [21039.006652] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf4a7:0x0]// may get corrupted (rc -108) [21039.006659] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1324a:0x0]// may get corrupted (rc -108) [21039.006667] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x132f0:0x0]// may get corrupted (rc -108) [21039.006668] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1324e:0x0]// may get corrupted (rc -108) [21039.006670] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1228b:0x0]// may get corrupted (rc -108) [21039.006671] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x132f2:0x0]// may get corrupted (rc -108) [21039.006673] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13263:0x0]// may get corrupted (rc -108) [21039.006676] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x131c9:0x0]// may get corrupted (rc -108) [21039.006677] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x132f3:0x0]// may get corrupted (rc -108) [21039.006687] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x133c3:0x0]// may get corrupted (rc -108) [21039.006691] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbdee:0x0]// may get corrupted (rc -108) [21039.006701] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x133eb:0x0]// may get corrupted (rc -108) [21039.006705] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x133a8:0x0]// may get corrupted (rc -108) [21039.006707] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13250:0x0]// may get corrupted (rc -108) [21039.006708] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf52f:0x0]// may get corrupted (rc -108) [21039.006710] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf548:0x0]// may get corrupted (rc -108) [21039.006712] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd840:0x0]// may get corrupted (rc -108) [21039.006714] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcab4:0x0]// may get corrupted (rc -108) [21039.006717] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x133c6:0x0]// may get corrupted (rc -108) [21039.006733] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd88b:0x0]// may get corrupted (rc -108) [21039.006736] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x134cb:0x0]// may get corrupted (rc -108) [21039.006738] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf59d:0x0]// may get corrupted (rc -108) [21039.006740] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x132ef:0x0]// may get corrupted (rc -108) [21039.006744] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf530:0x0]// may get corrupted (rc -108) [21039.006747] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x134f6:0x0]// may get corrupted (rc -108) [21039.006748] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x134b0:0x0]// may get corrupted (rc -108) [21039.006761] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13551:0x0]// may get corrupted (rc -108) [21039.006762] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13531:0x0]// may get corrupted (rc -108) [21039.006770] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13554:0x0]// may get corrupted (rc -108) [21039.006773] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf531:0x0]// may get corrupted (rc -108) [21039.006775] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x134cd:0x0]// may get corrupted (rc -108) [21039.006776] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcadb:0x0]// may get corrupted (rc -108) [21039.006777] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x134cf:0x0]// may get corrupted (rc -108) [21039.006778] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf5e0:0x0]// may get corrupted (rc -108) [21039.006782] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13538:0x0]// may get corrupted (rc -108) [21039.006785] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13557:0x0]// may get corrupted (rc -108) [21039.006805] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x135ff:0x0]// may get corrupted (rc -108) [21039.006806] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x135f2:0x0]// may get corrupted (rc -108) [21039.006808] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x135ee:0x0]// may get corrupted (rc -108) [21039.006810] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc0dd:0x0]// may get corrupted (rc -108) [21039.006813] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd8c8:0x0]// may get corrupted (rc -108) [21039.006815] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13650:0x0]// may get corrupted (rc -108) [21039.006817] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf645:0x0]// may get corrupted (rc -108) [21039.006818] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x135f1:0x0]// may get corrupted (rc -108) [21039.006820] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x135ef:0x0]// may get corrupted (rc -108) [21039.006821] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.006823] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcb02:0x0]// may get corrupted (rc -108) [21039.006836] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd8fb:0x0]// may get corrupted (rc -108) [21039.006847] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd906:0x0]// may get corrupted (rc -108) [21039.006849] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x136f1:0x0]// may get corrupted (rc -108) [21039.006851] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x136ee:0x0]// may get corrupted (rc -108) [21039.006853] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x136ce:0x0]// may get corrupted (rc -108) [21039.006854] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd8fd:0x0]// may get corrupted (rc -108) [21039.006859] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x136cd:0x0]// may get corrupted (rc -108) [21039.006874] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf683:0x0]// may get corrupted (rc -108) [21039.006875] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x137c7:0x0]// may get corrupted (rc -108) [21039.006876] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1382a:0x0]// may get corrupted (rc -108) [21039.006878] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x132f1:0x0]// may get corrupted (rc -108) [21039.006879] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcb38:0x0]// may get corrupted (rc -108) [21039.006883] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf72b:0x0]// may get corrupted (rc -108) [21039.006898] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf77f:0x0]// may get corrupted (rc -108) [21039.006905] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf815:0x0]// may get corrupted (rc -108) [21039.006908] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x133ec:0x0]// may get corrupted (rc -108) [21039.006909] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd977:0x0]// may get corrupted (rc -108) [21039.006911] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd9c8:0x0]// may get corrupted (rc -108) [21039.006917] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcb4c:0x0]// may get corrupted (rc -108) [21039.006920] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf81b:0x0]// may get corrupted (rc -108) [21039.006933] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xda1f:0x0]// may get corrupted (rc -108) [21039.006934] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xda1b:0x0]// may get corrupted (rc -108) [21039.006943] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf8c6:0x0]// may get corrupted (rc -108) [21039.006944] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13552:0x0]// may get corrupted (rc -108) [21039.006946] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf8cc:0x0]// may get corrupted (rc -108) [21039.006957] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc497:0x0]// may get corrupted (rc -108) [21039.006966] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc4b1:0x0]// may get corrupted (rc -108) [21039.006967] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc0fb:0x0]// may get corrupted (rc -108) [21039.006969] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xda90:0x0]// may get corrupted (rc -108) [21039.006971] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf9a2:0x0]// may get corrupted (rc -108) [21039.006974] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcaf8:0x0]// may get corrupted (rc -108) [21039.006975] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xda25:0x0]// may get corrupted (rc -108) [21039.006995] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfaf9:0x0]// may get corrupted (rc -108) [21039.006997] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfb05:0x0]// may get corrupted (rc -108) [21039.007000] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc133:0x0]// may get corrupted (rc -108) [21039.007001] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf696:0x0]// may get corrupted (rc -108) [21039.007002] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfafb:0x0]// may get corrupted (rc -108) [21039.007009] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdb41:0x0]// may get corrupted (rc -108) [21039.007013] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf818:0x0]// may get corrupted (rc -108) [21039.007025] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdc4a:0x0]// may get corrupted (rc -108) [21039.007026] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfcfc:0x0]// may get corrupted (rc -108) [21039.007029] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfb6f:0x0]// may get corrupted (rc -108) [21039.007030] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcbdf:0x0]// may get corrupted (rc -108) [21039.007035] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfe41:0x0]// may get corrupted (rc -108) [21039.007037] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdd5c:0x0]// may get corrupted (rc -108) [21039.007039] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfb71:0x0]// may get corrupted (rc -108) [21039.007040] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbe77:0x0]// may get corrupted (rc -108) [21039.007050] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xde3c:0x0]// may get corrupted (rc -108) [21039.007052] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc590:0x0]// may get corrupted (rc -108) [21039.007058] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcb75:0x0]// may get corrupted (rc -108) [21039.007065] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcdf8:0x0]// may get corrupted (rc -108) [21039.007067] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x101fe:0x0]// may get corrupted (rc -108) [21039.007068] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xda93:0x0]// may get corrupted (rc -108) [21039.007070] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcdb1:0x0]// may get corrupted (rc -108) [21039.007072] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10234:0x0]// may get corrupted (rc -108) [21039.007073] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.007075] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfd0c:0x0]// may get corrupted (rc -108) [21039.007090] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x102d4:0x0]// may get corrupted (rc -108) [21039.007093] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xce6f:0x0]// may get corrupted (rc -108) [21039.007095] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1025e:0x0]// may get corrupted (rc -108) [21039.007099] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdfec:0x0]// may get corrupted (rc -108) [21039.007100] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10437:0x0]// may get corrupted (rc -108) [21039.007103] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfb6d:0x0]// may get corrupted (rc -108) [21039.007104] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1040c:0x0]// may get corrupted (rc -108) [21039.007106] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x102d6:0x0]// may get corrupted (rc -108) [21039.007126] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x101e7:0x0]// may get corrupted (rc -108) [21039.007128] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcda3:0x0]// may get corrupted (rc -108) [21039.007129] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe098:0x0]// may get corrupted (rc -108) [21039.007131] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1058d:0x0]// may get corrupted (rc -108) [21039.007132] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x104a1:0x0]// may get corrupted (rc -108) [21039.007134] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe094:0x0]// may get corrupted (rc -108) [21039.007136] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe097:0x0]// may get corrupted (rc -108) [21039.007140] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcebf:0x0]// may get corrupted (rc -108) [21039.007141] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe092:0x0]// may get corrupted (rc -108) [21039.007143] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1058c:0x0]// may get corrupted (rc -108) [21039.007145] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1059b:0x0]// may get corrupted (rc -108) [21039.007150] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xceb4:0x0]// may get corrupted (rc -108) [21039.007169] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdff0:0x0]// may get corrupted (rc -108) [21039.007173] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe1f5:0x0]// may get corrupted (rc -108) [21039.007174] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1088c:0x0]// may get corrupted (rc -108) [21039.007177] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe093:0x0]// may get corrupted (rc -108) [21039.007178] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1088a:0x0]// may get corrupted (rc -108) [21039.007181] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcf2f:0x0]// may get corrupted (rc -108) [21039.007182] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1088e:0x0]// may get corrupted (rc -108) [21039.007185] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdfee:0x0]// may get corrupted (rc -108) [21039.007187] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10888:0x0]// may get corrupted (rc -108) [21039.007188] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x107a0:0x0]// may get corrupted (rc -108) [21039.007198] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10a1e:0x0]// may get corrupted (rc -108) [21039.007202] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.007215] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe3d4:0x0]// may get corrupted (rc -108) [21039.007219] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe3ce:0x0]// may get corrupted (rc -108) [21039.007225] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10a29:0x0]// may get corrupted (rc -108) [21039.007226] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10bbd:0x0]// may get corrupted (rc -108) [21039.007228] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x107dd:0x0]// may get corrupted (rc -108) [21039.007230] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe3d2:0x0]// may get corrupted (rc -108) [21039.007232] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10bba:0x0]// may get corrupted (rc -108) [21039.007233] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe3d6:0x0]// may get corrupted (rc -108) [21039.007235] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10b12:0x0]// may get corrupted (rc -108) [21039.007240] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc22c:0x0]// may get corrupted (rc -108) [21039.007242] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe3cf:0x0]// may get corrupted (rc -108) [21039.007261] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10de6:0x0]// may get corrupted (rc -108) [21039.007263] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10dbe:0x0]// may get corrupted (rc -108) [21039.007266] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10e4a:0x0]// may get corrupted (rc -108) [21039.007275] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10e51:0x0]// may get corrupted (rc -108) [21039.007277] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10d34:0x0]// may get corrupted (rc -108) [21039.007278] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10ce6:0x0]// may get corrupted (rc -108) [21039.007280] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcf30:0x0]// may get corrupted (rc -108) [21039.007283] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10e4d:0x0]// may get corrupted (rc -108) [21039.007285] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc65b:0x0]// may get corrupted (rc -108) [21039.007304] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10fba:0x0]// may get corrupted (rc -108) [21039.007310] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10ba0:0x0]// may get corrupted (rc -108) [21039.007312] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe3d0:0x0]// may get corrupted (rc -108) [21039.007314] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10fb6:0x0]// may get corrupted (rc -108) [21039.007316] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11013:0x0]// may get corrupted (rc -108) [21039.007318] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10e4f:0x0]// may get corrupted (rc -108) [21039.007319] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10d36:0x0]// may get corrupted (rc -108) [21039.007321] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10de8:0x0]// may get corrupted (rc -108) [21039.007331] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.007338] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc24c:0x0]// may get corrupted (rc -108) [21039.007351] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1100b:0x0]// may get corrupted (rc -108) [21039.007352] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe57c:0x0]// may get corrupted (rc -108) [21039.007353] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe504:0x0]// may get corrupted (rc -108) [21039.007354] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11158:0x0]// may get corrupted (rc -108) [21039.007356] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10fbc:0x0]// may get corrupted (rc -108) [21039.007360] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc731:0x0]// may get corrupted (rc -108) [21039.007361] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe524:0x0]// may get corrupted (rc -108) [21039.007363] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd0b6:0x0]// may get corrupted (rc -108) [21039.007383] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x112e4:0x0]// may get corrupted (rc -108) [21039.007384] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc269:0x0]// may get corrupted (rc -108) [21039.007386] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x111de:0x0]// may get corrupted (rc -108) [21039.007388] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x112a0:0x0]// may get corrupted (rc -108) [21039.007389] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe61c:0x0]// may get corrupted (rc -108) [21039.007392] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbebe:0x0]// may get corrupted (rc -108) [21039.007394] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd11d:0x0]// may get corrupted (rc -108) [21039.007396] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10fbf:0x0]// may get corrupted (rc -108) [21039.007397] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10fc0:0x0]// may get corrupted (rc -108) [21039.007399] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x112f1:0x0]// may get corrupted (rc -108) [21039.007401] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe621:0x0]// may get corrupted (rc -108) [21039.007421] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd0d3:0x0]// may get corrupted (rc -108) [21039.007423] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd11b:0x0]// may get corrupted (rc -108) [21039.007425] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11433:0x0]// may get corrupted (rc -108) [21039.007426] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x112a1:0x0]// may get corrupted (rc -108) [21039.007427] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbfff:0x0]// may get corrupted (rc -108) [21039.007430] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x114fb:0x0]// may get corrupted (rc -108) [21039.007433] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe69e:0x0]// may get corrupted (rc -108) [21039.007436] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd157:0x0]// may get corrupted (rc -108) [21039.007437] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe707:0x0]// may get corrupted (rc -108) [21039.007450] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe7b8:0x0]// may get corrupted (rc -108) [21039.007451] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe7b6:0x0]// may get corrupted (rc -108) [21039.007452] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.007458] Lustre: 112017:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc29e:0x0]// may get corrupted (rc -108) [21039.007460] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe699:0x0]// may get corrupted (rc -108) [21039.007475] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x116fb:0x0]// may get corrupted (rc -108) [21039.007476] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd187:0x0]// may get corrupted (rc -108) [21039.007478] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x113fa:0x0]// may get corrupted (rc -108) [21039.007479] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x116fa:0x0]// may get corrupted (rc -108) [21039.007481] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe7b9:0x0]// may get corrupted (rc -108) [21039.007482] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11794:0x0]// may get corrupted (rc -108) [21039.007506] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd244:0x0]// may get corrupted (rc -108) [21039.007507] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe7ba:0x0]// may get corrupted (rc -108) [21039.007509] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1179f:0x0]// may get corrupted (rc -108) [21039.007511] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x117a5:0x0]// may get corrupted (rc -108) [21039.007512] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe83c:0x0]// may get corrupted (rc -108) [21039.007515] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe880:0x0]// may get corrupted (rc -108) [21039.007518] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc7d2:0x0]// may get corrupted (rc -108) [21039.007523] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1183d:0x0]// may get corrupted (rc -108) [21039.007533] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11941:0x0]// may get corrupted (rc -108) [21039.007534] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc808:0x0]// may get corrupted (rc -108) [21039.007539] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11942:0x0]// may get corrupted (rc -108) [21039.007568] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe901:0x0]// may get corrupted (rc -108) [21039.007572] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x119c8:0x0]// may get corrupted (rc -108) [21039.007574] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x119ca:0x0]// may get corrupted (rc -108) [21039.007576] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11969:0x0]// may get corrupted (rc -108) [21039.007578] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11a76:0x0]// may get corrupted (rc -108) [21039.007580] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe900:0x0]// may get corrupted (rc -108) [21039.007581] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1199e:0x0]// may get corrupted (rc -108) [21039.007583] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe97c:0x0]// may get corrupted (rc -108) [21039.007585] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe69b:0x0]// may get corrupted (rc -108) [21039.007587] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x118d9:0x0]// may get corrupted (rc -108) [21039.007588] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.007604] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xea17:0x0]// may get corrupted (rc -108) [21039.007607] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11be4:0x0]// may get corrupted (rc -108) [21039.007614] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe7b7:0x0]// may get corrupted (rc -108) [21039.007615] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c0b:0x0]// may get corrupted (rc -108) [21039.007618] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11a25:0x0]// may get corrupted (rc -108) [21039.007620] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11be5:0x0]// may get corrupted (rc -108) [21039.007621] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c03:0x0]// may get corrupted (rc -108) [21039.007623] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11be6:0x0]// may get corrupted (rc -108) [21039.007627] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c05:0x0]// may get corrupted (rc -108) [21039.007641] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeaf5:0x0]// may get corrupted (rc -108) [21039.007645] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeae7:0x0]// may get corrupted (rc -108) [21039.007648] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeaf9:0x0]// may get corrupted (rc -108) [21039.007652] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11d71:0x0]// may get corrupted (rc -108) [21039.007654] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11d9e:0x0]// may get corrupted (rc -108) [21039.007657] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11e67:0x0]// may get corrupted (rc -108) [21039.007658] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11844:0x0]// may get corrupted (rc -108) [21039.007660] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11e69:0x0]// may get corrupted (rc -108) [21039.007661] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11d99:0x0]// may get corrupted (rc -108) [21039.007663] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11d9d:0x0]// may get corrupted (rc -108) [21039.007681] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11f8a:0x0]// may get corrupted (rc -108) [21039.007687] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11a0b:0x0]// may get corrupted (rc -108) [21039.007688] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11f8b:0x0]// may get corrupted (rc -108) [21039.007690] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xebc6:0x0]// may get corrupted (rc -108) [21039.007693] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11f89:0x0]// may get corrupted (rc -108) [21039.007694] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xec1f:0x0]// may get corrupted (rc -108) [21039.007696] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11f98:0x0]// may get corrupted (rc -108) [21039.007701] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc31c:0x0]// may get corrupted (rc -108) [21039.007702] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c07:0x0]// may get corrupted (rc -108) [21039.007718] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.007719] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12101:0x0]// may get corrupted (rc -108) [21039.007722] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1211c:0x0]// may get corrupted (rc -108) [21039.007726] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeaf8:0x0]// may get corrupted (rc -108) [21039.007728] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd412:0x0]// may get corrupted (rc -108) [21039.007730] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11f86:0x0]// may get corrupted (rc -108) [21039.007732] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1211e:0x0]// may get corrupted (rc -108) [21039.007734] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12117:0x0]// may get corrupted (rc -108) [21039.007748] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd45a:0x0]// may get corrupted (rc -108) [21039.007757] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12232:0x0]// may get corrupted (rc -108) [21039.007759] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12100:0x0]// may get corrupted (rc -108) [21039.007763] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc33d:0x0]// may get corrupted (rc -108) [21039.007764] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd461:0x0]// may get corrupted (rc -108) [21039.007765] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xec18:0x0]// may get corrupted (rc -108) [21039.007779] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc34b:0x0]// may get corrupted (rc -108) [21039.007788] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd4a8:0x0]// may get corrupted (rc -108) [21039.007790] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12401:0x0]// may get corrupted (rc -108) [21039.007794] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12405:0x0]// may get corrupted (rc -108) [21039.007796] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12404:0x0]// may get corrupted (rc -108) [21039.007797] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12270:0x0]// may get corrupted (rc -108) [21039.007799] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x123a7:0x0]// may get corrupted (rc -108) [21039.007801] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c06:0x0]// may get corrupted (rc -108) [21039.007826] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12540:0x0]// may get corrupted (rc -108) [21039.007827] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xee06:0x0]// may get corrupted (rc -108) [21039.007829] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd4ed:0x0]// may get corrupted (rc -108) [21039.007831] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12541:0x0]// may get corrupted (rc -108) [21039.007833] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11f88:0x0]// may get corrupted (rc -108) [21039.007834] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12543:0x0]// may get corrupted (rc -108) [21039.007836] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1226c:0x0]// may get corrupted (rc -108) [21039.007844] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.007854] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeeb2:0x0]// may get corrupted (rc -108) [21039.007864] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xee07:0x0]// may get corrupted (rc -108) [21039.007866] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12116:0x0]// may get corrupted (rc -108) [21039.007867] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12671:0x0]// may get corrupted (rc -108) [21039.007869] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12673:0x0]// may get corrupted (rc -108) [21039.007871] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12688:0x0]// may get corrupted (rc -108) [21039.007873] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xed79:0x0]// may get corrupted (rc -108) [21039.007875] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12611:0x0]// may get corrupted (rc -108) [21039.007877] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1268d:0x0]// may get corrupted (rc -108) [21039.007893] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12739:0x0]// may get corrupted (rc -108) [21039.007894] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12738:0x0]// may get corrupted (rc -108) [21039.007896] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc93a:0x0]// may get corrupted (rc -108) [21039.007905] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1273c:0x0]// may get corrupted (rc -108) [21039.007909] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12668:0x0]// may get corrupted (rc -108) [21039.007910] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1273e:0x0]// may get corrupted (rc -108) [21039.007912] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12544:0x0]// may get corrupted (rc -108) [21039.007924] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd5c8:0x0]// may get corrupted (rc -108) [21039.007935] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x128a6:0x0]// may get corrupted (rc -108) [21039.007936] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xefa4:0x0]// may get corrupted (rc -108) [21039.007938] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1273a:0x0]// may get corrupted (rc -108) [21039.007939] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x128f5:0x0]// may get corrupted (rc -108) [21039.007942] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x128f7:0x0]// may get corrupted (rc -108) [21039.007944] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd5cd:0x0]// may get corrupted (rc -108) [21039.007946] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeec5:0x0]// may get corrupted (rc -108) [21039.007948] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12689:0x0]// may get corrupted (rc -108) [21039.007962] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.007966] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x137c6:0x0]// may get corrupted (rc -108) [21039.007967] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbf0a:0x0]// may get corrupted (rc -108) [21039.007975] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x129ed:0x0]// may get corrupted (rc -108) [21039.007977] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1273b:0x0]// may get corrupted (rc -108) [21039.007979] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x128a5:0x0]// may get corrupted (rc -108) [21039.007981] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x129ec:0x0]// may get corrupted (rc -108) [21039.007983] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x129f1:0x0]// may get corrupted (rc -108) [21039.007985] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x129ee:0x0]// may get corrupted (rc -108) [21039.007987] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12918:0x0]// may get corrupted (rc -108) [21039.008000] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf147:0x0]// may get corrupted (rc -108) [21039.008006] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd662:0x0]// may get corrupted (rc -108) [21039.008014] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x129ea:0x0]// may get corrupted (rc -108) [21039.008015] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12b43:0x0]// may get corrupted (rc -108) [21039.008017] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x129e9:0x0]// may get corrupted (rc -108) [21039.008019] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf148:0x0]// may get corrupted (rc -108) [21039.008021] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf14a:0x0]// may get corrupted (rc -108) [21039.008023] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12b7f:0x0]// may get corrupted (rc -108) [21039.008025] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd663:0x0]// may get corrupted (rc -108) [21039.008027] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12b79:0x0]// may get corrupted (rc -108) [21039.008029] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x129e8:0x0]// may get corrupted (rc -108) [21039.008046] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12d5b:0x0]// may get corrupted (rc -108) [21039.008050] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12d63:0x0]// may get corrupted (rc -108) [21039.008052] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd661:0x0]// may get corrupted (rc -108) [21039.008054] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12b7c:0x0]// may get corrupted (rc -108) [21039.008056] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12b7d:0x0]// may get corrupted (rc -108) [21039.008058] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12d5d:0x0]// may get corrupted (rc -108) [21039.008060] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12f06:0x0]// may get corrupted (rc -108) [21039.008062] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd71a:0x0]// may get corrupted (rc -108) [21039.008063] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf34d:0x0]// may get corrupted (rc -108) [21039.008077] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.008079] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1308a:0x0]// may get corrupted (rc -108) [21039.008084] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12e96:0x0]// may get corrupted (rc -108) [21039.008085] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1308b:0x0]// may get corrupted (rc -108) [21039.008088] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12f63:0x0]// may get corrupted (rc -108) [21039.008089] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xca6d:0x0]// may get corrupted (rc -108) [21039.008091] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12f60:0x0]// may get corrupted (rc -108) [21039.008094] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd795:0x0]// may get corrupted (rc -108) [21039.008097] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1308c:0x0]// may get corrupted (rc -108) [21039.008099] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd720:0x0]// may get corrupted (rc -108) [21039.008100] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc3f9:0x0]// may get corrupted (rc -108) [21039.008117] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13218:0x0]// may get corrupted (rc -108) [21039.008119] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf492:0x0]// may get corrupted (rc -108) [21039.008122] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf486:0x0]// may get corrupted (rc -108) [21039.008124] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf3f4:0x0]// may get corrupted (rc -108) [21039.008126] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13089:0x0]// may get corrupted (rc -108) [21039.008128] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13129:0x0]// may get corrupted (rc -108) [21039.008130] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13217:0x0]// may get corrupted (rc -108) [21039.008131] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd7b2:0x0]// may get corrupted (rc -108) [21039.008133] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xca8f:0x0]// may get corrupted (rc -108) [21039.008134] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xca8c:0x0]// may get corrupted (rc -108) [21039.008157] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x133a5:0x0]// may get corrupted (rc -108) [21039.008159] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13314:0x0]// may get corrupted (rc -108) [21039.008161] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd84c:0x0]// may get corrupted (rc -108) [21039.008162] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13313:0x0]// may get corrupted (rc -108) [21039.008164] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf549:0x0]// may get corrupted (rc -108) [21039.008165] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf4a2:0x0]// may get corrupted (rc -108) [21039.008167] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf495:0x0]// may get corrupted (rc -108) [21039.008169] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x132f5:0x0]// may get corrupted (rc -108) [21039.008188] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd7ee:0x0]// may get corrupted (rc -108) [21039.008191] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc420:0x0]// may get corrupted (rc -108) [21039.008192] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x134fd:0x0]// may get corrupted (rc -108) [21039.008194] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x134ab:0x0]// may get corrupted (rc -108) [21039.008195] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x133a6:0x0]// may get corrupted (rc -108) [21039.008196] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.008198] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc0d7:0x0]// may get corrupted (rc -108) [21039.008200] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x134ff:0x0]// may get corrupted (rc -108) [21039.008202] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x132fb:0x0]// may get corrupted (rc -108) [21039.008204] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x134a4:0x0]// may get corrupted (rc -108) [21039.008205] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1351a:0x0]// may get corrupted (rc -108) [21039.008206] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.008223] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x137af:0x0]// may get corrupted (rc -108) [21039.008228] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x134fe:0x0]// may get corrupted (rc -108) [21039.008230] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf5eb:0x0]// may get corrupted (rc -108) [21039.008232] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf5ec:0x0]// may get corrupted (rc -108) [21039.008234] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x135e5:0x0]// may get corrupted (rc -108) [21039.008236] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13519:0x0]// may get corrupted (rc -108) [21039.008238] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1357e:0x0]// may get corrupted (rc -108) [21039.008239] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd8b2:0x0]// may get corrupted (rc -108) [21039.008257] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x136bd:0x0]// may get corrupted (rc -108) [21039.008258] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x136ca:0x0]// may get corrupted (rc -108) [21039.008261] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13502:0x0]// may get corrupted (rc -108) [21039.008262] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x135d4:0x0]// may get corrupted (rc -108) [21039.008264] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1369f:0x0]// may get corrupted (rc -108) [21039.008266] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbf34:0x0]// may get corrupted (rc -108) [21039.008267] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13708:0x0]// may get corrupted (rc -108) [21039.008268] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13796:0x0]// may get corrupted (rc -108) [21039.008269] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x135e8:0x0]// may get corrupted (rc -108) [21039.008272] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x135e6:0x0]// may get corrupted (rc -108) [21039.008298] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcb39:0x0]// may get corrupted (rc -108) [21039.008301] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcb28:0x0]// may get corrupted (rc -108) [21039.008303] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x137ae:0x0]// may get corrupted (rc -108) [21039.008304] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf770:0x0]// may get corrupted (rc -108) [21039.008306] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd96f:0x0]// may get corrupted (rc -108) [21039.008308] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd935:0x0]// may get corrupted (rc -108) [21039.008310] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xca3a:0x0]// may get corrupted (rc -108) [21039.008312] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf6dd:0x0]// may get corrupted (rc -108) [21039.008313] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd976:0x0]// may get corrupted (rc -108) [21039.008316] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf662:0x0]// may get corrupted (rc -108) [21039.008330] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.008337] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf6f8:0x0]// may get corrupted (rc -108) [21039.008341] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf731:0x0]// may get corrupted (rc -108) [21039.008346] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf996:0x0]// may get corrupted (rc -108) [21039.008349] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf903:0x0]// may get corrupted (rc -108) [21039.008350] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc4d7:0x0]// may get corrupted (rc -108) [21039.008352] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfac1:0x0]// may get corrupted (rc -108) [21039.008353] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbf49:0x0]// may get corrupted (rc -108) [21039.008355] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf99f:0x0]// may get corrupted (rc -108) [21039.008370] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfc61:0x0]// may get corrupted (rc -108) [21039.008375] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf6f3:0x0]// may get corrupted (rc -108) [21039.008382] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfa2c:0x0]// may get corrupted (rc -108) [21039.008385] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbd93:0x0]// may get corrupted (rc -108) [21039.008387] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcbb6:0x0]// may get corrupted (rc -108) [21039.008388] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdbf4:0x0]// may get corrupted (rc -108) [21039.008389] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10080:0x0]// may get corrupted (rc -108) [21039.008391] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdc09:0x0]// may get corrupted (rc -108) [21039.008393] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xde15:0x0]// may get corrupted (rc -108) [21039.008395] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdbf3:0x0]// may get corrupted (rc -108) [21039.008416] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x104d0:0x0]// may get corrupted (rc -108) [21039.008417] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1040b:0x0]// may get corrupted (rc -108) [21039.008418] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10083:0x0]// may get corrupted (rc -108) [21039.008419] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x104d9:0x0]// may get corrupted (rc -108) [21039.008421] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x104d8:0x0]// may get corrupted (rc -108) [21039.008422] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10401:0x0]// may get corrupted (rc -108) [21039.008424] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xce9b:0x0]// may get corrupted (rc -108) [21039.008426] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10403:0x0]// may get corrupted (rc -108) [21039.008428] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf77e:0x0]// may get corrupted (rc -108) [21039.008445] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.008449] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe1e4:0x0]// may get corrupted (rc -108) [21039.008455] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe2c3:0x0]// may get corrupted (rc -108) [21039.008457] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe2c2:0x0]// may get corrupted (rc -108) [21039.008459] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe2c0:0x0]// may get corrupted (rc -108) [21039.008460] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdc07:0x0]// may get corrupted (rc -108) [21039.008462] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe2bd:0x0]// may get corrupted (rc -108) [21039.008463] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbe81:0x0]// may get corrupted (rc -108) [21039.008465] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe1dd:0x0]// may get corrupted (rc -108) [21039.008467] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe2bb:0x0]// may get corrupted (rc -108) [21039.008468] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10890:0x0]// may get corrupted (rc -108) [21039.008484] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe442:0x0]// may get corrupted (rc -108) [21039.008490] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xce88:0x0]// may get corrupted (rc -108) [21039.008491] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe058:0x0]// may get corrupted (rc -108) [21039.008494] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe444:0x0]// may get corrupted (rc -108) [21039.008496] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc6ea:0x0]// may get corrupted (rc -108) [21039.008497] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10cd8:0x0]// may get corrupted (rc -108) [21039.008500] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd048:0x0]// may get corrupted (rc -108) [21039.008505] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe47a:0x0]// may get corrupted (rc -108) [21039.008507] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10e53:0x0]// may get corrupted (rc -108) [21039.008508] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10cd5:0x0]// may get corrupted (rc -108) [21039.008521] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x110d8:0x0]// may get corrupted (rc -108) [21039.008526] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe445:0x0]// may get corrupted (rc -108) [21039.008529] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe479:0x0]// may get corrupted (rc -108) [21039.008531] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x110d9:0x0]// may get corrupted (rc -108) [21039.008532] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x110d6:0x0]// may get corrupted (rc -108) [21039.008534] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd09f:0x0]// may get corrupted (rc -108) [21039.008537] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x111df:0x0]// may get corrupted (rc -108) [21039.008539] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc737:0x0]// may get corrupted (rc -108) [21039.008564] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x113c2:0x0]// may get corrupted (rc -108) [21039.008566] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x113c5:0x0]// may get corrupted (rc -108) [21039.008569] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10cd4:0x0]// may get corrupted (rc -108) [21039.008570] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe677:0x0]// may get corrupted (rc -108) [21039.008571] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe696:0x0]// may get corrupted (rc -108) [21039.008572] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x113a8:0x0]// may get corrupted (rc -108) [21039.008574] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe676:0x0]// may get corrupted (rc -108) [21039.008577] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbfe8:0x0]// may get corrupted (rc -108) [21039.008595] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd0e0:0x0]// may get corrupted (rc -108) [21039.008596] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd1dd:0x0]// may get corrupted (rc -108) [21039.008600] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x116fe:0x0]// may get corrupted (rc -108) [21039.008602] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd200:0x0]// may get corrupted (rc -108) [21039.008603] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd049:0x0]// may get corrupted (rc -108) [21039.008604] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbeca:0x0]// may get corrupted (rc -108) [21039.008605] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x116fc:0x0]// may get corrupted (rc -108) [21039.008608] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11575:0x0]// may get corrupted (rc -108) [21039.008611] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x116ff:0x0]// may get corrupted (rc -108) [21039.008612] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x110e0:0x0]// may get corrupted (rc -108) [21039.008627] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1193e:0x0]// may get corrupted (rc -108) [21039.008643] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd2e6:0x0]// may get corrupted (rc -108) [21039.008645] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ab5:0x0]// may get corrupted (rc -108) [21039.008646] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11b1e:0x0]// may get corrupted (rc -108) [21039.008647] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11b6c:0x0]// may get corrupted (rc -108) [21039.008649] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11b16:0x0]// may get corrupted (rc -108) [21039.008650] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd153:0x0]// may get corrupted (rc -108) [21039.008652] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc2e2:0x0]// may get corrupted (rc -108) [21039.008654] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd194:0x0]// may get corrupted (rc -108) [21039.008655] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x116fd:0x0]// may get corrupted (rc -108) [21039.008657] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe97e:0x0]// may get corrupted (rc -108) [21039.008660] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11a1e:0x0]// may get corrupted (rc -108) [21039.008678] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11b44:0x0]// may get corrupted (rc -108) [21039.008680] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ac4:0x0]// may get corrupted (rc -108) [21039.008682] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ef8:0x0]// may get corrupted (rc -108) [21039.008685] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11e71:0x0]// may get corrupted (rc -108) [21039.008688] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeadc:0x0]// may get corrupted (rc -108) [21039.008689] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ea6:0x0]// may get corrupted (rc -108) [21039.008691] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11f06:0x0]// may get corrupted (rc -108) [21039.008693] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ecc:0x0]// may get corrupted (rc -108) [21039.008694] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11f0b:0x0]// may get corrupted (rc -108) [21039.008697] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x118ec:0x0]// may get corrupted (rc -108) [21039.008699] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ec8:0x0]// may get corrupted (rc -108) [21039.008711] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xec57:0x0]// may get corrupted (rc -108) [21039.008716] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11f0a:0x0]// may get corrupted (rc -108) [21039.008724] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12076:0x0]// may get corrupted (rc -108) [21039.008725] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc8e3:0x0]// may get corrupted (rc -108) [21039.008727] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeadf:0x0]// may get corrupted (rc -108) [21039.008728] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xecea:0x0]// may get corrupted (rc -108) [21039.008730] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1222a:0x0]// may get corrupted (rc -108) [21039.008732] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xec9f:0x0]// may get corrupted (rc -108) [21039.008735] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x121c3:0x0]// may get corrupted (rc -108) [21039.008737] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12085:0x0]// may get corrupted (rc -108) [21039.008739] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11f09:0x0]// may get corrupted (rc -108) [21039.008756] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd453:0x0]// may get corrupted (rc -108) [21039.008757] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.008761] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc935:0x0]// may get corrupted (rc -108) [21039.008762] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x122f4:0x0]// may get corrupted (rc -108) [21039.008764] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12478:0x0]// may get corrupted (rc -108) [21039.008766] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbefb:0x0]// may get corrupted (rc -108) [21039.008768] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xee7e:0x0]// may get corrupted (rc -108) [21039.008769] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1247c:0x0]// may get corrupted (rc -108) [21039.008775] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12142:0x0]// may get corrupted (rc -108) [21039.008776] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12545:0x0]// may get corrupted (rc -108) [21039.008777] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd51e:0x0]// may get corrupted (rc -108) [21039.008793] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xef2e:0x0]// may get corrupted (rc -108) [21039.008796] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xedf1:0x0]// may get corrupted (rc -108) [21039.008798] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12546:0x0]// may get corrupted (rc -108) [21039.008799] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xefbc:0x0]// may get corrupted (rc -108) [21039.008801] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1281d:0x0]// may get corrupted (rc -108) [21039.008804] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xef31:0x0]// may get corrupted (rc -108) [21039.008806] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc9a9:0x0]// may get corrupted (rc -108) [21039.008807] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12122:0x0]// may get corrupted (rc -108) [21039.008809] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xefbd:0x0]// may get corrupted (rc -108) [21039.008824] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x129f7:0x0]// may get corrupted (rc -108) [21039.008828] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12a61:0x0]// may get corrupted (rc -108) [21039.008830] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12498:0x0]// may get corrupted (rc -108) [21039.008833] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1287e:0x0]// may get corrupted (rc -108) [21039.008834] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf13c:0x0]// may get corrupted (rc -108) [21039.008837] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf145:0x0]// may get corrupted (rc -108) [21039.008840] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf146:0x0]// may get corrupted (rc -108) [21039.008841] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12b81:0x0]// may get corrupted (rc -108) [21039.008843] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd5ba:0x0]// may get corrupted (rc -108) [21039.008844] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12b87:0x0]// may get corrupted (rc -108) [21039.008846] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x129fa:0x0]// may get corrupted (rc -108) [21039.008862] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbe50:0x0]// may get corrupted (rc -108) [21039.008864] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12bbe:0x0]// may get corrupted (rc -108) [21039.008866] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd76a:0x0]// may get corrupted (rc -108) [21039.008868] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12b89:0x0]// may get corrupted (rc -108) [21039.008870] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xca57:0x0]// may get corrupted (rc -108) [21039.008871] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1306f:0x0]// may get corrupted (rc -108) [21039.008874] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13088:0x0]// may get corrupted (rc -108) [21039.008875] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13071:0x0]// may get corrupted (rc -108) [21039.008881] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.008886] Lustre: 112024:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1319d:0x0]// may get corrupted (rc -108) [21039.008896] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x131b2:0x0]// may get corrupted (rc -108) [21039.008899] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xca94:0x0]// may get corrupted (rc -108) [21039.008901] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd7ec:0x0]// may get corrupted (rc -108) [21039.008902] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd80f:0x0]// may get corrupted (rc -108) [21039.008904] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf4d7:0x0]// may get corrupted (rc -108) [21039.008906] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12bbf:0x0]// may get corrupted (rc -108) [21039.008907] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf356:0x0]// may get corrupted (rc -108) [21039.008911] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13286:0x0]// may get corrupted (rc -108) [21039.008927] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf60a:0x0]// may get corrupted (rc -108) [21039.008936] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13287:0x0]// may get corrupted (rc -108) [21039.008937] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf65a:0x0]// may get corrupted (rc -108) [21039.008938] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1306e:0x0]// may get corrupted (rc -108) [21039.008940] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf484:0x0]// may get corrupted (rc -108) [21039.008941] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13496:0x0]// may get corrupted (rc -108) [21039.008942] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13689:0x0]// may get corrupted (rc -108) [21039.008945] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf4cb:0x0]// may get corrupted (rc -108) [21039.008948] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf658:0x0]// may get corrupted (rc -108) [21039.008963] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xda26:0x0]// may get corrupted (rc -108) [21039.008967] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x135ed:0x0]// may get corrupted (rc -108) [21039.008969] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc440:0x0]// may get corrupted (rc -108) [21039.008972] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf8d5:0x0]// may get corrupted (rc -108) [21039.008975] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcb9c:0x0]// may get corrupted (rc -108) [21039.008976] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13683:0x0]// may get corrupted (rc -108) [21039.008978] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfad5:0x0]// may get corrupted (rc -108) [21039.008982] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfad4:0x0]// may get corrupted (rc -108) [21039.008997] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc1ab:0x0]// may get corrupted (rc -108) [21039.008999] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xdf5a:0x0]// may get corrupted (rc -108) [21039.009000] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.009004] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfad0:0x0]// may get corrupted (rc -108) [21039.009005] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10263:0x0]// may get corrupted (rc -108) [21039.009008] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10328:0x0]// may get corrupted (rc -108) [21039.009011] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10400:0x0]// may get corrupted (rc -108) [21039.009012] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfb72:0x0]// may get corrupted (rc -108) [21039.009013] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xce18:0x0]// may get corrupted (rc -108) [21039.009015] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe061:0x0]// may get corrupted (rc -108) [21039.009036] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf8d4:0x0]// may get corrupted (rc -108) [21039.009039] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10bbe:0x0]// may get corrupted (rc -108) [21039.009040] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10623:0x0]// may get corrupted (rc -108) [21039.009041] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x109aa:0x0]// may get corrupted (rc -108) [21039.009042] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10a1a:0x0]// may get corrupted (rc -108) [21039.009045] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x102f8:0x0]// may get corrupted (rc -108) [21039.009046] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10c73:0x0]// may get corrupted (rc -108) [21039.009047] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1099f:0x0]// may get corrupted (rc -108) [21039.009050] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10a13:0x0]// may get corrupted (rc -108) [21039.009051] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10a1c:0x0]// may get corrupted (rc -108) [21039.009069] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1100e:0x0]// may get corrupted (rc -108) [21039.009074] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10e83:0x0]// may get corrupted (rc -108) [21039.009082] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe5b0:0x0]// may get corrupted (rc -108) [21039.009083] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1121e:0x0]// may get corrupted (rc -108) [21039.009085] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1121d:0x0]// may get corrupted (rc -108) [21039.009086] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1137e:0x0]// may get corrupted (rc -108) [21039.009088] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1121a:0x0]// may get corrupted (rc -108) [21039.009089] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe622:0x0]// may get corrupted (rc -108) [21039.009090] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10e7f:0x0]// may get corrupted (rc -108) [21039.009093] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1123b:0x0]// may get corrupted (rc -108) [21039.009110] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11795:0x0]// may get corrupted (rc -108) [21039.009113] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd11c:0x0]// may get corrupted (rc -108) [21039.009115] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.009118] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x118e1:0x0]// may get corrupted (rc -108) [21039.009120] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.009123] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11792:0x0]// may get corrupted (rc -108) [21039.009124] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11382:0x0]// may get corrupted (rc -108) [21039.009125] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x118e6:0x0]// may get corrupted (rc -108) [21039.009127] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x118e4:0x0]// may get corrupted (rc -108) [21039.009128] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1123a:0x0]// may get corrupted (rc -108) [21039.009129] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.009132] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11793:0x0]// may get corrupted (rc -108) [21039.009144] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11ca5:0x0]// may get corrupted (rc -108) [21039.009150] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeabc:0x0]// may get corrupted (rc -108) [21039.009156] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x118e8:0x0]// may get corrupted (rc -108) [21039.009158] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xec20:0x0]// may get corrupted (rc -108) [21039.009160] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x118da:0x0]// may get corrupted (rc -108) [21039.009162] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xead2:0x0]// may get corrupted (rc -108) [21039.009164] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeba5:0x0]// may get corrupted (rc -108) [21039.009166] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x118e5:0x0]// may get corrupted (rc -108) [21039.009169] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeabd:0x0]// may get corrupted (rc -108) [21039.009171] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11d56:0x0]// may get corrupted (rc -108) [21039.009173] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11f85:0x0]// may get corrupted (rc -108) [21039.009185] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc961:0x0]// may get corrupted (rc -108) [21039.009190] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12697:0x0]// may get corrupted (rc -108) [21039.009192] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12695:0x0]// may get corrupted (rc -108) [21039.009193] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbdde:0x0]// may get corrupted (rc -108) [21039.009196] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xeeaf:0x0]// may get corrupted (rc -108) [21039.009198] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xec23:0x0]// may get corrupted (rc -108) [21039.009200] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x126a5:0x0]// may get corrupted (rc -108) [21039.009202] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd537:0x0]// may get corrupted (rc -108) [21039.009204] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x126a8:0x0]// may get corrupted (rc -108) [21039.009205] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1269d:0x0]// may get corrupted (rc -108) [21039.009207] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xee7f:0x0]// may get corrupted (rc -108) [21039.009229] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd3e3:0x0]// may get corrupted (rc -108) [21039.009231] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12d59:0x0]// may get corrupted (rc -108) [21039.009233] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xca07:0x0]// may get corrupted (rc -108) [21039.009235] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf34e:0x0]// may get corrupted (rc -108) [21039.009237] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf22b:0x0]// may get corrupted (rc -108) [21039.009238] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf264:0x0]// may get corrupted (rc -108) [21039.009239] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd6d3:0x0]// may get corrupted (rc -108) [21039.009241] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd747:0x0]// may get corrupted (rc -108) [21039.009244] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xbd90:0x0]// may get corrupted (rc -108) [21039.009246] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf263:0x0]// may get corrupted (rc -108) [21039.009252] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.009265] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf55b:0x0]// may get corrupted (rc -108) [21039.009267] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcabe:0x0]// may get corrupted (rc -108) [21039.009269] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x126a0:0x0]// may get corrupted (rc -108) [21039.009271] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd856:0x0]// may get corrupted (rc -108) [21039.009273] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf54d:0x0]// may get corrupted (rc -108) [21039.009290] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd9d9:0x0]// may get corrupted (rc -108) [21039.009294] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd9cb:0x0]// may get corrupted (rc -108) [21039.009298] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12d57:0x0]// may get corrupted (rc -108) [21039.009299] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf55c:0x0]// may get corrupted (rc -108) [21039.009302] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfb92:0x0]// may get corrupted (rc -108) [21039.009305] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd862:0x0]// may get corrupted (rc -108) [21039.009306] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd9f4:0x0]// may get corrupted (rc -108) [21039.009328] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10674:0x0]// may get corrupted (rc -108) [21039.009329] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc1e1:0x0]// may get corrupted (rc -108) [21039.009331] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcef6:0x0]// may get corrupted (rc -108) [21039.009332] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcb8e:0x0]// may get corrupted (rc -108) [21039.009335] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfb79:0x0]// may get corrupted (rc -108) [21039.009336] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc655:0x0]// may get corrupted (rc -108) [21039.009338] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcf1d:0x0]// may get corrupted (rc -108) [21039.009340] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcf22:0x0]// may get corrupted (rc -108) [21039.009349] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10f8a:0x0]// may get corrupted (rc -108) [21039.009363] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11501:0x0]// may get corrupted (rc -108) [21039.009364] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10fac:0x0]// may get corrupted (rc -108) [21039.009365] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x114fd:0x0]// may get corrupted (rc -108) [21039.009366] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x114fc:0x0]// may get corrupted (rc -108) [21039.009369] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11506:0x0]// may get corrupted (rc -108) [21039.009370] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.009372] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11389:0x0]// may get corrupted (rc -108) [21039.009374] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11500:0x0]// may get corrupted (rc -108) [21039.009376] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xca3c:0x0]// may get corrupted (rc -108) [21039.009378] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11503:0x0]// may get corrupted (rc -108) [21039.009379] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.009391] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c85:0x0]// may get corrupted (rc -108) [21039.009398] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12070:0x0]// may get corrupted (rc -108) [21039.009405] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1206f:0x0]// may get corrupted (rc -108) [21039.009407] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11502:0x0]// may get corrupted (rc -108) [21039.009409] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfb74:0x0]// may get corrupted (rc -108) [21039.009411] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x122f0:0x0]// may get corrupted (rc -108) [21039.009414] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x122f8:0x0]// may get corrupted (rc -108) [21039.009415] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10faf:0x0]// may get corrupted (rc -108) [21039.009418] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xed4f:0x0]// may get corrupted (rc -108) [21039.009431] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12912:0x0]// may get corrupted (rc -108) [21039.009437] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12916:0x0]// may get corrupted (rc -108) [21039.009438] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12917:0x0]// may get corrupted (rc -108) [21039.009439] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xed55:0x0]// may get corrupted (rc -108) [21039.009443] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x129f5:0x0]// may get corrupted (rc -108) [21039.009449] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12073:0x0]// may get corrupted (rc -108) [21039.009463] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1319b:0x0]// may get corrupted (rc -108) [21039.009465] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd7b4:0x0]// may get corrupted (rc -108) [21039.009466] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12071:0x0]// may get corrupted (rc -108) [21039.009468] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12915:0x0]// may get corrupted (rc -108) [21039.009469] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x132f4:0x0]// may get corrupted (rc -108) [21039.009471] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13197:0x0]// may get corrupted (rc -108) [21039.009473] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd814:0x0]// may get corrupted (rc -108) [21039.009475] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd816:0x0]// may get corrupted (rc -108) [21039.009477] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x129f6:0x0]// may get corrupted (rc -108) [21039.009490] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10264:0x0]// may get corrupted (rc -108) [21039.009493] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10672:0x0]// may get corrupted (rc -108) [21039.009494] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.009502] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1319a:0x0]// may get corrupted (rc -108) [21039.009506] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10e97:0x0]// may get corrupted (rc -108) [21039.009508] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10e95:0x0]// may get corrupted (rc -108) [21039.009509] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10999:0x0]// may get corrupted (rc -108) [21039.009511] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10f86:0x0]// may get corrupted (rc -108) [21039.009513] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10f88:0x0]// may get corrupted (rc -108) [21039.009515] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10e93:0x0]// may get corrupted (rc -108) [21039.009517] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12074:0x0]// may get corrupted (rc -108) [21039.009519] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10673:0x0]// may get corrupted (rc -108) [21039.009520] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11507:0x0]// may get corrupted (rc -108) [21039.009534] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xed63:0x0]// may get corrupted (rc -108) [21039.009536] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12312:0x0]// may get corrupted (rc -108) [21039.009538] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12314:0x0]// may get corrupted (rc -108) [21039.009559] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1231c:0x0]// may get corrupted (rc -108) [21039.009561] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf40b:0x0]// may get corrupted (rc -108) [21039.009563] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12bc0:0x0]// may get corrupted (rc -108) [21039.009565] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1308e:0x0]// may get corrupted (rc -108) [21039.009567] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x128fa:0x0]// may get corrupted (rc -108) [21039.009568] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x10e92:0x0]// may get corrupted (rc -108) [21039.009584] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11701:0x0]// may get corrupted (rc -108) [21039.009585] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xcd90:0x0]// may get corrupted (rc -108) [21039.009587] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1156f:0x0]// may get corrupted (rc -108) [21039.009589] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x12429:0x0]// may get corrupted (rc -108) [21039.009591] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x118ea:0x0]// may get corrupted (rc -108) [21039.009594] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1270f:0x0]// may get corrupted (rc -108) [21039.009598] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xefee:0x0]// may get corrupted (rc -108) [21039.009616] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xd8ea:0x0]// may get corrupted (rc -108) [21039.009617] Lustre: 112026:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13690:0x0]// may get corrupted (rc -108) [21039.009618] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.009619] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11b73:0x0]// may get corrupted (rc -108) [21039.009621] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13494:0x0]// may get corrupted (rc -108) [21039.009625] Lustre: 112018:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf81a:0x0]// may get corrupted (rc -108) [21039.009627] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13196:0x0]// may get corrupted (rc -108) [21039.009629] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11b6d:0x0]// may get corrupted (rc -108) [21039.009630] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.009646] Lustre: 112019:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc036:0x0]// may get corrupted (rc -108) [21039.009649] Lustre: 112020:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x122f9:0x0]// may get corrupted (rc -108) [21039.009650] Lustre: 112021:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x11c37:0x0]// may get corrupted (rc -108) [21039.009652] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf819:0x0]// may get corrupted (rc -108) [21039.009655] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xef2d:0x0]// may get corrupted (rc -108) [21039.009658] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xc2e7:0x0]// may get corrupted (rc -108) [21039.009661] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf81d:0x0]// may get corrupted (rc -108) [21039.009668] Lustre: 112022:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xfb93:0x0]// may get corrupted (rc -108) [21039.009674] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xf428:0x0]// may get corrupted (rc -108) [21039.009676] Lustre: 112027:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe9e8:0x0]// may get corrupted (rc -108) [21039.009681] Lustre: 112016:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xe9e9:0x0]// may get corrupted (rc -108) [21039.009683] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x1008f:0x0]// may get corrupted (rc -108) [21039.009690] Lustre: 112025:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0xde16:0x0]// may get corrupted (rc -108) [21039.009697] Lustre: 112023:0:(llite_lib.c:2855:ll_dirty_page_discard_warn()) scratch0: dirty page discard: 10.0.10.175@o2ib10:/scratch0/fid: [0x200000404:0x13497:0x0]// may get corrupted (rc -108) [21039.009749] systemd-journald[54924]: /dev/kmsg buffer overrun, some messages lost. [21039.372187] LustreError: 118883:0:(ldlm_resource.c:1211:ldlm_resource_complain()) scratch0-MDT0000-mdc-ffff9daf09e3f800: namespace resource [0x200000404:0x103fc:0x0].0x0 (ffff9dac51d2ba40) refcount nonzero (1) after lock cleanup; forcing cleanup. [21039.426970] Lustre: scratch0-MDT0000-mdc-ffff9daf09e3f800: Connection restored to 10.0.10.175@o2ib10 (at 10.0.10.175@o2ib10) [27395.839004] Lustre: Unmounted scratch0-client [27409.000226] Lustre: Mounted scratch0-client [27447.216899] Lustre: Unmounted scratch0-client [27447.531921] Lustre: Mounted scratch0-client [29400.592149] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ldlm_bl_10:187143] [29400.593015] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29400.593058] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29400.593085] CPU: 4 PID: 187143 Comm: ldlm_bl_10 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29400.593086] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29400.593088] task: ffff9dbf37c1e180 ti: ffff9db52be0c000 task.ti: ffff9db52be0c000 [29400.593089] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [29400.593099] RSP: 0018:ffff9db52be0fc70 EFLAGS: 00000246 [29400.593100] RAX: 0000000000000000 RBX: ffff9da6260a6d00 RCX: 0000000000210000 [29400.593101] RDX: ffff9dbf3dedb780 RSI: 0000000000790001 RDI: ffff9daab5554478 [29400.593102] RBP: ffff9db52be0fc70 R08: ffff9daf3e71b780 R09: 0000000000000000 [29400.593103] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9db52be0fc38 [29400.593104] R13: ffff9daa61a04de0 R14: ffff9daf216f0cf8 R15: ffff9daa61a04ec8 [29400.593105] FS: 0000000000000000(0000) GS:ffff9daf3e700000(0000) knlGS:0000000000000000 [29400.593106] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29400.593108] CR2: 00007f307c7e5000 CR3: 000000153ac10000 CR4: 00000000003607e0 [29400.593109] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29400.593110] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29400.593111] Call Trace: [29400.593118] [] queued_spin_lock_slowpath+0xb/0xf [29400.593124] [] _raw_spin_lock+0x20/0x30 [29400.593153] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29400.593163] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29400.593188] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29400.593202] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29400.593217] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29400.593233] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29400.593249] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29400.593253] [] ? wake_up_state+0x20/0x20 [29400.593268] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29400.593271] [] kthread+0xd1/0xe0 [29400.593273] [] ? insert_kthread_work+0x40/0x40 [29400.593276] [] ret_from_fork_nospec_begin+0x7/0x21 [29400.593278] [] ? insert_kthread_work+0x40/0x40 [29400.593279] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [29400.601148] NMI watchdog: BUG: soft lockup - CPU#8 stuck for 22s! [ldlm_bl_24:191279] [29400.601867] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29400.601895] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29400.601914] CPU: 8 PID: 191279 Comm: ldlm_bl_24 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29400.601915] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29400.601917] task: ffff9da06890d140 ti: ffff9db130f1c000 task.ti: ffff9db130f1c000 [29400.601918] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [29400.601923] RSP: 0018:ffff9db130f1fc70 EFLAGS: 00000246 [29400.601924] RAX: 0000000000000000 RBX: ffff9da6260a7c00 RCX: 0000000000410000 [29400.601925] RDX: ffff9dbf3df1b780 RSI: 0000000000810001 RDI: ffff9daab5554478 [29400.601926] RBP: ffff9db130f1fc70 R08: ffff9daf3e81b780 R09: 0000000000000000 [29400.601927] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9db130f1fc38 [29400.601928] R13: ffff9dae4d38c2c0 R14: ffff9da868aac6c8 R15: ffff9dae4d38c3a8 [29400.601929] FS: 0000000000000000(0000) GS:ffff9daf3e800000(0000) knlGS:0000000000000000 [29400.601930] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29400.601931] CR2: 00007fc3d8d40000 CR3: 000000153ac10000 CR4: 00000000003607e0 [29400.601933] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29400.601934] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29400.601934] Call Trace: [29400.601938] [] queued_spin_lock_slowpath+0xb/0xf [29400.601941] [] _raw_spin_lock+0x20/0x30 [29400.601961] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29400.601969] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29400.601987] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29400.602000] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29400.602015] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29400.602029] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29400.602043] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29400.602046] [] ? wake_up_state+0x20/0x20 [29400.602059] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29400.602062] [] kthread+0xd1/0xe0 [29400.602063] [] ? insert_kthread_work+0x40/0x40 [29400.602066] [] ret_from_fork_nospec_begin+0x7/0x21 [29400.602068] [] ? insert_kthread_work+0x40/0x40 [29400.602069] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [29400.689151] NMI watchdog: BUG: soft lockup - CPU#19 stuck for 22s! [ldlm_bl_13:187146] [29400.690062] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29400.690105] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29400.690132] CPU: 19 PID: 187146 Comm: ldlm_bl_13 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29400.690134] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29400.690135] task: ffff9dbf386ee180 ti: ffff9db9236e0000 task.ti: ffff9db9236e0000 [29400.690137] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [29400.690145] RSP: 0018:ffff9db9236e3c70 EFLAGS: 00000246 [29400.690146] RAX: 0000000000000000 RBX: ffff9dabbe0f0780 RCX: 0000000000990000 [29400.690147] RDX: ffff9dbf3de1b780 RSI: 0000000000610001 RDI: ffff9daab5554478 [29400.690148] RBP: ffff9db9236e3c70 R08: ffff9dbf3dfdb780 R09: 0000000000000000 [29400.690149] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9db9236e3c38 [29400.690150] R13: ffff9db69df34de0 R14: ffff9dbdb2af14d8 R15: ffff9db69df34ec8 [29400.690151] FS: 0000000000000000(0000) GS:ffff9dbf3dfc0000(0000) knlGS:0000000000000000 [29400.690152] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29400.690153] CR2: 00007f9ccc7f2000 CR3: 000000153ac10000 CR4: 00000000003607e0 [29400.690155] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29400.690156] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29400.690157] Call Trace: [29400.690162] [] queued_spin_lock_slowpath+0xb/0xf [29400.690167] [] _raw_spin_lock+0x20/0x30 [29400.690199] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29400.690210] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29400.690239] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29400.690253] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29400.690269] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29400.690283] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29400.690298] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29400.690301] [] ? wake_up_state+0x20/0x20 [29400.690315] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29400.690317] [] kthread+0xd1/0xe0 [29400.690319] [] ? insert_kthread_work+0x40/0x40 [29400.690322] [] ret_from_fork_nospec_begin+0x7/0x21 [29400.690324] [] ? insert_kthread_work+0x40/0x40 [29400.690325] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [29428.592317] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ldlm_bl_10:187143] [29428.593189] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29428.593230] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29428.593256] CPU: 4 PID: 187143 Comm: ldlm_bl_10 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29428.593258] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29428.593260] task: ffff9dbf37c1e180 ti: ffff9db52be0c000 task.ti: ffff9db52be0c000 [29428.593261] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [29428.593271] RSP: 0018:ffff9db52be0fc70 EFLAGS: 00000246 [29428.593272] RAX: 0000000000000000 RBX: ffff9dab7cec9540 RCX: 0000000000210000 [29428.593273] RDX: ffff9dbf3de1b780 RSI: 0000000000610001 RDI: ffff9daab5554478 [29428.593274] RBP: ffff9db52be0fc70 R08: ffff9daf3e71b780 R09: 0000000000000000 [29428.593275] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9db52be0fc38 [29428.593276] R13: ffff9daa61a04de0 R14: ffff9daf216f0cf8 R15: ffff9daa61a04ec8 [29428.593277] FS: 0000000000000000(0000) GS:ffff9daf3e700000(0000) knlGS:0000000000000000 [29428.593278] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29428.593279] CR2: 00007f307c7e5000 CR3: 000000153ac10000 CR4: 00000000003607e0 [29428.593281] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29428.593281] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29428.593283] Call Trace: [29428.593290] [] queued_spin_lock_slowpath+0xb/0xf [29428.593295] [] _raw_spin_lock+0x20/0x30 [29428.593326] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29428.593337] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29428.593364] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29428.593378] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29428.593393] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29428.593407] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29428.593422] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29428.593425] [] ? wake_up_state+0x20/0x20 [29428.593439] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29428.593444] [] kthread+0xd1/0xe0 [29428.593445] [] ? insert_kthread_work+0x40/0x40 [29428.593448] [] ret_from_fork_nospec_begin+0x7/0x21 [29428.593450] [] ? insert_kthread_work+0x40/0x40 [29428.593451] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [29428.601315] NMI watchdog: BUG: soft lockup - CPU#8 stuck for 22s! [ldlm_bl_24:191279] [29428.602037] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29428.602065] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29428.602084] CPU: 8 PID: 191279 Comm: ldlm_bl_24 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29428.602085] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29428.602087] task: ffff9da06890d140 ti: ffff9db130f1c000 task.ti: ffff9db130f1c000 [29428.602088] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [29428.602092] RSP: 0018:ffff9db130f1fc70 EFLAGS: 00000246 [29428.602093] RAX: 0000000000000000 RBX: ffff9dabbe0f3ac0 RCX: 0000000000410000 [29428.602094] RDX: ffff9daf3e75b780 RSI: 0000000000290001 RDI: ffff9daab5554478 [29428.602095] RBP: ffff9db130f1fc70 R08: ffff9daf3e81b780 R09: 0000000000000000 [29428.602097] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9db130f1fc38 [29428.602097] R13: ffff9dae4d38c2c0 R14: ffff9da868aac6c8 R15: ffff9dae4d38c3a8 [29428.602099] FS: 0000000000000000(0000) GS:ffff9daf3e800000(0000) knlGS:0000000000000000 [29428.602100] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29428.602101] CR2: 00007fc3d8d40000 CR3: 000000153ac10000 CR4: 00000000003607e0 [29428.602102] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29428.602103] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29428.602104] Call Trace: [29428.602107] [] queued_spin_lock_slowpath+0xb/0xf [29428.602110] [] _raw_spin_lock+0x20/0x30 [29428.602128] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29428.602136] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29428.602151] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29428.602165] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29428.602179] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29428.602193] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29428.602207] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29428.602210] [] ? wake_up_state+0x20/0x20 [29428.602224] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29428.602226] [] kthread+0xd1/0xe0 [29428.602228] [] ? insert_kthread_work+0x40/0x40 [29428.602230] [] ret_from_fork_nospec_begin+0x7/0x21 [29428.602232] [] ? insert_kthread_work+0x40/0x40 [29428.602233] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [29432.674341] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 23s! [ldlm_bl_22:190993] [29432.675415] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29432.675458] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29432.675486] CPU: 13 PID: 190993 Comm: ldlm_bl_22 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29432.675487] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29432.675489] task: ffff9dab5cc36180 ti: ffff9daf3d964000 task.ti: ffff9daf3d964000 [29432.675491] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [29432.675500] RSP: 0018:ffff9daf3d967c70 EFLAGS: 00000246 [29432.675501] RAX: 0000000000000000 RBX: ffff9dab7ceced00 RCX: 0000000000690000 [29432.675502] RDX: ffff9daf3e7db780 RSI: 0000000000390001 RDI: ffff9daab5554478 [29432.675503] RBP: ffff9daf3d967c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [29432.675504] R10: 0000000000000000 R11: fffff9031ef5f680 R12: ffff9daf3d967c38 [29432.675505] R13: ffff9dae4d388b20 R14: ffff9da868aad178 R15: ffff9dae4d388c08 [29432.675507] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [29432.675508] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29432.675509] CR2: 00007f3d5f3923e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [29432.675511] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29432.675512] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29432.675513] Call Trace: [29432.675518] [] queued_spin_lock_slowpath+0xb/0xf [29432.675523] [] _raw_spin_lock+0x20/0x30 [29432.675559] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29432.675572] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29432.675603] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29432.675617] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29432.675634] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29432.675649] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29432.675665] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29432.675668] [] ? wake_up_state+0x20/0x20 [29432.675684] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29432.675688] [] kthread+0xd1/0xe0 [29432.675690] [] ? insert_kthread_work+0x40/0x40 [29432.675693] [] ret_from_fork_nospec_begin+0x7/0x21 [29432.675695] [] ? insert_kthread_work+0x40/0x40 [29432.675696] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [29435.787360] INFO: rcu_sched self-detected stall on CPU { 8} (t=60000 jiffies g=1060068 c=1060067 q=592216) [29435.788274] Task dump for CPU 8: [29435.788276] ldlm_bl_24 R running task 0 191279 2 0x00000088 [29435.788279] Call Trace: [29435.788281] [] sched_show_task+0xa8/0x110 [29435.788292] [] dump_cpu_task+0x39/0x70 [29435.788296] [] rcu_dump_cpu_stacks+0x90/0xd0 [29435.788298] [] rcu_check_callbacks+0x442/0x730 [29435.788303] [] ? tick_sched_do_timer+0x50/0x50 [29435.788308] [] update_process_times+0x46/0x80 [29435.788310] [] tick_sched_handle+0x30/0x70 [29435.788311] [] tick_sched_timer+0x39/0x80 [29435.788317] [] __hrtimer_run_queues+0xf3/0x270 [29435.788319] [] hrtimer_interrupt+0xaf/0x1d0 [29435.788325] [] local_apic_timer_interrupt+0x3b/0x60 [29435.788330] [] smp_apic_timer_interrupt+0x43/0x60 [29435.788334] [] apic_timer_interrupt+0x162/0x170 [29435.788335] [] ? ldlm_kms_shift_cb+0x7b/0x220 [ptlrpc] [29435.788381] [] ? ldlm_extent_shift_kms+0x1b0/0x1b0 [ptlrpc] [29435.788398] [] interval_iterate_reverse+0x53/0x270 [ptlrpc] [29435.788411] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [29435.788414] [] ? remove_waiter+0x66/0x126 [29435.788424] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [29435.788437] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29435.788462] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29435.788476] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29435.788489] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29435.788503] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29435.788506] [] ? wake_up_state+0x20/0x20 [29435.788520] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29435.788522] [] kthread+0xd1/0xe0 [29435.788524] [] ? insert_kthread_work+0x40/0x40 [29435.788526] [] ret_from_fork_nospec_begin+0x7/0x21 [29435.788528] [] ? insert_kthread_work+0x40/0x40 [29460.674515] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_22:190993] [29460.675496] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29460.675538] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29460.675564] CPU: 13 PID: 190993 Comm: ldlm_bl_22 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29460.675566] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29460.675567] task: ffff9dab5cc36180 ti: ffff9daf3d964000 task.ti: ffff9daf3d964000 [29460.675569] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [29460.675577] RSP: 0018:ffff9daf3d967c70 EFLAGS: 00000246 [29460.675578] RAX: 0000000000000000 RBX: ffff9da6260a6a80 RCX: 0000000000690000 [29460.675579] RDX: ffff9dbf3de1b780 RSI: 0000000000610001 RDI: ffff9daab5554478 [29460.675580] RBP: ffff9daf3d967c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [29460.675581] R10: 0000000000000000 R11: fffff90338589100 R12: ffff9daf3d967c38 [29460.675582] R13: ffff9dae4d388b20 R14: ffff9da868aad178 R15: ffff9dae4d388c08 [29460.675584] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [29460.675585] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29460.675586] CR2: 00007f3d5f3923e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [29460.675587] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29460.675588] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29460.675589] Call Trace: [29460.675595] [] queued_spin_lock_slowpath+0xb/0xf [29460.675599] [] _raw_spin_lock+0x20/0x30 [29460.675635] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29460.675647] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29460.675678] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29460.675691] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29460.675707] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29460.675722] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29460.675737] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29460.675740] [] ? wake_up_state+0x20/0x20 [29460.675754] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29460.675756] [] kthread+0xd1/0xe0 [29460.675758] [] ? insert_kthread_work+0x40/0x40 [29460.675761] [] ret_from_fork_nospec_begin+0x7/0x21 [29460.675763] [] ? insert_kthread_work+0x40/0x40 [29460.675764] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [29476.672615] NMI watchdog: BUG: soft lockup - CPU#12 stuck for 23s! [ldlm_bl_20:187153] [29476.673738] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29476.673782] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29476.673810] CPU: 12 PID: 187153 Comm: ldlm_bl_20 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29476.673811] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29476.673813] task: ffff9dbf2c38b0c0 ti: ffff9dba18704000 task.ti: ffff9dba18704000 [29476.673815] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [29476.673823] RSP: 0018:ffff9dba18707c70 EFLAGS: 00000246 [29476.673824] RAX: 0000000000000000 RBX: ffff9dabbe0f3e80 RCX: 0000000000610000 [29476.673825] RDX: ffff9daf3e61b780 RSI: 0000000000010001 RDI: ffff9daab5554478 [29476.673826] RBP: ffff9dba18707c70 R08: ffff9dbf3de1b780 R09: 0000000000000000 [29476.673827] R10: 0000000000000000 R11: fffff902cc909280 R12: ffff9dba18707c38 [29476.673829] R13: ffff9dae4d38cde0 R14: ffff9dacfdf603f8 R15: ffff9dae4d38cec8 [29476.673830] FS: 0000000000000000(0000) GS:ffff9dbf3de00000(0000) knlGS:0000000000000000 [29476.673831] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29476.673833] CR2: 00007fc89e18c000 CR3: 0000001e0b6ca000 CR4: 00000000003607e0 [29476.673834] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29476.673835] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29476.673836] Call Trace: [29476.673842] [] queued_spin_lock_slowpath+0xb/0xf [29476.673847] [] _raw_spin_lock+0x20/0x30 [29476.673883] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29476.673895] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29476.673922] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29476.673937] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29476.673953] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29476.673968] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29476.673984] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29476.673988] [] ? wake_up_state+0x20/0x20 [29476.674003] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29476.674006] [] kthread+0xd1/0xe0 [29476.674008] [] ? insert_kthread_work+0x40/0x40 [29476.674011] [] ret_from_fork_nospec_begin+0x7/0x21 [29476.674013] [] ? insert_kthread_work+0x40/0x40 [29476.674014] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [29488.674690] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_22:190993] [29488.675824] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29488.675868] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29488.675895] CPU: 13 PID: 190993 Comm: ldlm_bl_22 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29488.675897] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29488.675898] task: ffff9dab5cc36180 ti: ffff9daf3d964000 task.ti: ffff9daf3d964000 [29488.675900] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x15b/0x200 [29488.675907] RSP: 0018:ffff9daf3d967c70 EFLAGS: 00000202 [29488.675909] RAX: 0000000000000001 RBX: ffff9dabbe0f4780 RCX: 0000000000690000 [29488.675910] RDX: 0000000000610001 RSI: 0000000000790001 RDI: ffff9daab5554478 [29488.675911] RBP: ffff9daf3d967c70 R08: ffff9dbf3de5b780 R09: ffff9daf3e79b780 [29488.675912] R10: 0000000000000000 R11: fffff902f0fd4000 R12: ffff9daf3d967c38 [29488.675913] R13: ffff9dae4d388b20 R14: ffff9da868aad178 R15: ffff9dae4d388c08 [29488.675915] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [29488.675916] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29488.675917] CR2: 00007f3d5f3923e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [29488.675919] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29488.675920] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29488.675921] Call Trace: [29488.675926] [] queued_spin_lock_slowpath+0xb/0xf [29488.675931] [] _raw_spin_lock+0x20/0x30 [29488.675967] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29488.675979] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29488.676009] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29488.676024] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29488.676040] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29488.676056] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29488.676072] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29488.676075] [] ? wake_up_state+0x20/0x20 [29488.676090] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29488.676094] [] kthread+0xd1/0xe0 [29488.676096] [] ? insert_kthread_work+0x40/0x40 [29488.676099] [] ret_from_fork_nospec_begin+0x7/0x21 [29488.676101] [] ? insert_kthread_work+0x40/0x40 [29488.676102] Code: 74 04 41 0f 18 09 8b 17 0f b7 c2 85 c0 74 21 83 f8 03 75 10 eb 1a 66 2e 0f 1f 84 00 00 00 00 00 85 c0 74 0c f3 90 8b 17 0f b7 c2 <83> f8 03 75 f0 be 01 00 00 00 eb 15 66 0f 1f 84 00 00 00 00 00 [29498.687751] INFO: rcu_sched self-detected stall on CPU { 13} (t=60001 jiffies g=1060069 c=1060068 q=903783) [29498.688764] INFO: rcu_sched detected stalls on CPUs/tasks: { 13} (detected by 8, t=60002 jiffies, g=1060069, c=1060068, q=903793) [29498.688765] Task dump for CPU 13: [29498.688768] ldlm_bl_22 R running task 0 190993 2 0x00000088 [29498.688769] Call Trace: [29498.688778] [] ? __slab_free+0x81/0x2f0 [29498.688785] [] ? radix_tree_next_chunk+0x116/0x2d0 [29498.688787] [] ? radix_tree_gang_lookup+0xcd/0x150 [29498.688811] [] ? cl2vvp_io+0x1d/0x90 [lustre] [29498.688821] [] ? vvp_io_fini+0x34/0x6b0 [lustre] [29498.688851] [] ? cl_io_fini+0x78/0x250 [obdclass] [29498.688856] [] ? native_queued_spin_lock_slowpath+0x126/0x200 [29498.688859] [] ? queued_spin_lock_slowpath+0xb/0xf [29498.688862] [] ? _raw_spin_lock+0x20/0x30 [29498.688876] [] ? cl_object_attr_lock+0x1a/0x20 [obdclass] [29498.688885] [] ? osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29498.688910] [] ? ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29498.688924] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29498.688939] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29498.688956] [] ? ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29498.688971] [] ? ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29498.688975] [] ? wake_up_state+0x20/0x20 [29498.688990] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29498.688993] [] ? kthread+0xd1/0xe0 [29498.688994] [] ? insert_kthread_work+0x40/0x40 [29498.688997] [] ? ret_from_fork_nospec_begin+0x7/0x21 [29498.688998] [] ? insert_kthread_work+0x40/0x40 [29498.690913] Task dump for CPU 13: [29498.690915] ldlm_bl_22 R running task 0 190993 2 0x00000088 [29498.690917] Call Trace: [29498.690919] [] sched_show_task+0xa8/0x110 [29498.690928] [] dump_cpu_task+0x39/0x70 [29498.690932] [] rcu_dump_cpu_stacks+0x90/0xd0 [29498.690934] [] rcu_check_callbacks+0x442/0x730 [29498.690938] [] ? tick_sched_do_timer+0x50/0x50 [29498.690942] [] update_process_times+0x46/0x80 [29498.690944] [] tick_sched_handle+0x30/0x70 [29498.690946] [] tick_sched_timer+0x39/0x80 [29498.690951] [] __hrtimer_run_queues+0xf3/0x270 [29498.690953] [] hrtimer_interrupt+0xaf/0x1d0 [29498.690958] [] local_apic_timer_interrupt+0x3b/0x60 [29498.690962] [] smp_apic_timer_interrupt+0x43/0x60 [29498.690966] [] apic_timer_interrupt+0x162/0x170 [29498.690967] [] ? native_queued_spin_lock_slowpath+0x126/0x200 [29498.690974] [] queued_spin_lock_slowpath+0xb/0xf [29498.690977] [] _raw_spin_lock+0x20/0x30 [29498.691012] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29498.691023] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29498.691054] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29498.691068] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29498.691083] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29498.691099] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29498.691114] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29498.691117] [] ? wake_up_state+0x20/0x20 [29498.691132] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29498.691134] [] kthread+0xd1/0xe0 [29498.691136] [] ? insert_kthread_work+0x40/0x40 [29498.691139] [] ret_from_fork_nospec_begin+0x7/0x21 [29498.691141] [] ? insert_kthread_work+0x40/0x40 [29504.672790] NMI watchdog: BUG: soft lockup - CPU#12 stuck for 22s! [ldlm_bl_20:187153] [29504.673988] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29504.674031] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29504.674059] CPU: 12 PID: 187153 Comm: ldlm_bl_20 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29504.674060] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29504.674062] task: ffff9dbf2c38b0c0 ti: ffff9dba18704000 task.ti: ffff9dba18704000 [29504.674063] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [29504.674071] RSP: 0018:ffff9dba18707c70 EFLAGS: 00000246 [29504.674072] RAX: 0000000000000000 RBX: ffff9da6260a75c0 RCX: 0000000000610000 [29504.674073] RDX: ffff9daf3e65b780 RSI: 0000000000090001 RDI: ffff9daab5554478 [29504.674075] RBP: ffff9dba18707c70 R08: ffff9dbf3de1b780 R09: 0000000000000000 [29504.674076] R10: 0000000000000000 R11: fffff902f3871a80 R12: ffff9dba18707c38 [29504.674077] R13: ffff9dae4d38cde0 R14: ffff9dacfdf603f8 R15: ffff9dae4d38cec8 [29504.674078] FS: 0000000000000000(0000) GS:ffff9dbf3de00000(0000) knlGS:0000000000000000 [29504.674079] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29504.674081] CR2: 00007fc89e18c000 CR3: 0000001e0b6ca000 CR4: 00000000003607e0 [29504.674082] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29504.674083] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29504.674084] Call Trace: [29504.674090] [] queued_spin_lock_slowpath+0xb/0xf [29504.674095] [] _raw_spin_lock+0x20/0x30 [29504.674131] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29504.674143] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29504.674173] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29504.674187] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29504.674203] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29504.674219] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29504.674234] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29504.674238] [] ? wake_up_state+0x20/0x20 [29504.674253] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29504.674256] [] kthread+0xd1/0xe0 [29504.674258] [] ? insert_kthread_work+0x40/0x40 [29504.674261] [] ret_from_fork_nospec_begin+0x7/0x21 [29504.674263] [] ? insert_kthread_work+0x40/0x40 [29504.674264] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [29521.053031] INFO: task systemd:1 blocked for more than 120 seconds. [29521.054065] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [29521.054914] systemd D ffff9da069538000 0 1 0 0x00000000 [29521.054917] Call Trace: [29521.054925] [] ? security_inode_permission+0x22/0x30 [29521.054927] [] ? __inode_permission+0x52/0xd0 [29521.054932] [] schedule+0x29/0x70 [29521.054934] [] schedule_timeout+0x221/0x2d0 [29521.054938] [] ? mntput+0x24/0x40 [29521.054940] [] wait_for_completion+0xfd/0x140 [29521.054944] [] ? wake_up_state+0x20/0x20 [29521.054947] [] ? __call_rcu+0x2c0/0x2c0 [29521.054951] [] wait_rcu_gp+0x5e/0x80 [29521.054953] [] ? ftrace_raw_output_rcu_utilization+0x70/0x70 [29521.054955] [] synchronize_sched+0x3b/0x50 [29521.054958] [] mem_cgroup_css_alloc+0xc1/0x300 [29521.054962] [] cgroup_mkdir+0x264/0x560 [29521.054964] [] vfs_mkdir+0xbd/0x170 [29521.054966] [] SyS_mkdirat+0xca/0x100 [29521.054967] [] SyS_mkdir+0x19/0x20 [29521.054971] [] system_call_fastpath+0x22/0x27 [29524.674915] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_22:190993] [29524.675968] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29524.676009] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29524.676036] CPU: 13 PID: 190993 Comm: ldlm_bl_22 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29524.676037] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29524.676039] task: ffff9dab5cc36180 ti: ffff9daf3d964000 task.ti: ffff9daf3d964000 [29524.676040] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [29524.676048] RSP: 0018:ffff9daf3d967c70 EFLAGS: 00000246 [29524.676049] RAX: 0000000000000000 RBX: ffff9da6260a7480 RCX: 0000000000690000 [29524.676050] RDX: ffff9dbf3df5b780 RSI: 0000000000890001 RDI: ffff9daab5554478 [29524.676051] RBP: ffff9daf3d967c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [29524.676052] R10: 0000000000000000 R11: fffff902fe28b300 R12: ffff9daf3d967c38 [29524.676053] R13: ffff9dae4d388b20 R14: ffff9da868aad178 R15: ffff9dae4d388c08 [29524.676054] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [29524.676056] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29524.676057] CR2: 00007f3d5f3923e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [29524.676058] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29524.676059] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29524.676060] Call Trace: [29524.676065] [] queued_spin_lock_slowpath+0xb/0xf [29524.676069] [] _raw_spin_lock+0x20/0x30 [29524.676104] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29524.676116] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29524.676148] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29524.676162] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29524.676179] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29524.676194] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29524.676210] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29524.676214] [] ? wake_up_state+0x20/0x20 [29524.676229] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29524.676232] [] kthread+0xd1/0xe0 [29524.676234] [] ? insert_kthread_work+0x40/0x40 [29524.676237] [] ret_from_fork_nospec_begin+0x7/0x21 [29524.676239] [] ? insert_kthread_work+0x40/0x40 [29524.676240] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [29532.672965] NMI watchdog: BUG: soft lockup - CPU#12 stuck for 22s! [ldlm_bl_20:187153] [29532.674140] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29532.674181] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29532.674208] CPU: 12 PID: 187153 Comm: ldlm_bl_20 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29532.674209] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29532.674211] task: ffff9dbf2c38b0c0 ti: ffff9dba18704000 task.ti: ffff9dba18704000 [29532.674212] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [29532.674220] RSP: 0018:ffff9dba18707c70 EFLAGS: 00000246 [29532.674221] RAX: 0000000000000000 RBX: ffff9dab7cece440 RCX: 0000000000610000 [29532.674222] RDX: ffff9daf3e75b780 RSI: 0000000000290001 RDI: ffff9daab5554478 [29532.674223] RBP: ffff9dba18707c70 R08: ffff9dbf3de1b780 R09: 0000000000000000 [29532.674224] R10: 0000000000000000 R11: fffff902ee2b1d80 R12: ffff9dba18707c38 [29532.674225] R13: ffff9dae4d38cde0 R14: ffff9dacfdf603f8 R15: ffff9dae4d38cec8 [29532.674226] FS: 0000000000000000(0000) GS:ffff9dbf3de00000(0000) knlGS:0000000000000000 [29532.674227] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29532.674228] CR2: 00007fc89e18c000 CR3: 0000001e0b6ca000 CR4: 00000000003607e0 [29532.674230] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29532.674230] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29532.674231] Call Trace: [29532.674237] [] queued_spin_lock_slowpath+0xb/0xf [29532.674241] [] _raw_spin_lock+0x20/0x30 [29532.674277] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29532.674289] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29532.674320] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29532.674334] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29532.674350] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29532.674366] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29532.674382] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29532.674385] [] ? wake_up_state+0x20/0x20 [29532.674400] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29532.674404] [] kthread+0xd1/0xe0 [29532.674406] [] ? insert_kthread_work+0x40/0x40 [29532.674409] [] ret_from_fork_nospec_begin+0x7/0x21 [29532.674411] [] ? insert_kthread_work+0x40/0x40 [29532.674412] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [29548.607065] NMI watchdog: BUG: soft lockup - CPU#10 stuck for 23s! [ldlm_bl_27:191369] [29548.608048] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29548.608105] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29548.608132] CPU: 10 PID: 191369 Comm: ldlm_bl_27 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29548.608133] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29548.608135] task: ffff9daf3da7d140 ti: ffff9dbb07150000 task.ti: ffff9dbb07150000 [29548.608136] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [29548.608146] RSP: 0018:ffff9dbb07153c70 EFLAGS: 00000246 [29548.608148] RAX: 0000000000000000 RBX: ffff9da6260a6580 RCX: 0000000000510000 [29548.608149] RDX: ffff9dbf3e09b780 RSI: 0000000000b10001 RDI: ffff9daab5554478 [29548.608150] RBP: ffff9dbb07153c70 R08: ffff9daf3e89b780 R09: 0000000000000000 [29548.608151] R10: 0000000000000000 R11: fffff902ecf89280 R12: ffff9dbb07153c38 [29548.608152] R13: ffff9dac56716420 R14: ffff9dacfdf61dd8 R15: ffff9dac56716508 [29548.608153] FS: 0000000000000000(0000) GS:ffff9daf3e880000(0000) knlGS:0000000000000000 [29548.608155] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29548.608156] CR2: 00007ffb96da2000 CR3: 000000153ac10000 CR4: 00000000003607e0 [29548.608157] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29548.608158] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29548.608159] Call Trace: [29548.608166] [] queued_spin_lock_slowpath+0xb/0xf [29548.608172] [] _raw_spin_lock+0x20/0x30 [29548.608203] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29548.608213] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29548.608240] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29548.608254] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29548.608269] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29548.608284] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29548.608299] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29548.608302] [] ? wake_up_state+0x20/0x20 [29548.608316] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29548.608319] [] kthread+0xd1/0xe0 [29548.608321] [] ? insert_kthread_work+0x40/0x40 [29548.608324] [] ret_from_fork_nospec_begin+0x7/0x21 [29548.608326] [] ? insert_kthread_work+0x40/0x40 [29548.608327] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [29552.675091] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 23s! [ldlm_bl_22:190993] [29552.676156] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29552.676198] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29552.676225] CPU: 13 PID: 190993 Comm: ldlm_bl_22 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29552.676227] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29552.676228] task: ffff9dab5cc36180 ti: ffff9daf3d964000 task.ti: ffff9daf3d964000 [29552.676230] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [29552.676237] RSP: 0018:ffff9daf3d967c70 EFLAGS: 00000246 [29552.676238] RAX: 0000000000000000 RBX: ffff9dabbe0f0b40 RCX: 0000000000690000 [29552.676239] RDX: ffff9daf3e89b780 RSI: 0000000000510001 RDI: ffff9daab5554478 [29552.676240] RBP: ffff9daf3d967c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [29552.676242] R10: 0000000000000000 R11: fffff903403acd00 R12: ffff9daf3d967c38 [29552.676243] R13: ffff9dae4d388b20 R14: ffff9da868aad178 R15: ffff9dae4d388c08 [29552.676244] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [29552.676245] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29552.676246] CR2: 00007f3d5f3923e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [29552.676248] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29552.676249] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29552.676249] Call Trace: [29552.676255] [] queued_spin_lock_slowpath+0xb/0xf [29552.676259] [] _raw_spin_lock+0x20/0x30 [29552.676295] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29552.676306] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29552.676337] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29552.676351] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29552.676366] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29552.676381] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29552.676396] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29552.676399] [] ? wake_up_state+0x20/0x20 [29552.676414] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29552.676417] [] kthread+0xd1/0xe0 [29552.676419] [] ? insert_kthread_work+0x40/0x40 [29552.676422] [] ret_from_fork_nospec_begin+0x7/0x21 [29552.676423] [] ? insert_kthread_work+0x40/0x40 [29552.676424] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [29552.678088] NMI watchdog: BUG: soft lockup - CPU#14 stuck for 23s! [ldlm_bl_14:187147] [29552.678949] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29552.678978] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29552.678997] CPU: 14 PID: 187147 Comm: ldlm_bl_14 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29552.678999] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29552.679001] task: ffff9daf3959a080 ti: ffff9db944ad0000 task.ti: ffff9db944ad0000 [29552.679002] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [29552.679008] RSP: 0018:ffff9db944ad3c70 EFLAGS: 00000246 [29552.679009] RAX: 0000000000000000 RBX: ffff9da6260a7c00 RCX: 0000000000710000 [29552.679010] RDX: ffff9dbf3de1b780 RSI: 0000000000610001 RDI: ffff9daab5554478 [29552.679011] RBP: ffff9db944ad3c70 R08: ffff9dbf3de9b780 R09: 0000000000000000 [29552.679012] R10: 0000000000000000 R11: fffff902e4a7bc80 R12: ffff9db944ad3c38 [29552.679013] R13: ffff9dabd15242c0 R14: ffff9da868aad0e8 R15: ffff9dabd15243a8 [29552.679014] FS: 0000000000000000(0000) GS:ffff9dbf3de80000(0000) knlGS:0000000000000000 [29552.679015] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29552.679016] CR2: 00007f3d3e8ac000 CR3: 0000001e0b6ca000 CR4: 00000000003607e0 [29552.679018] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29552.679018] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29552.679019] Call Trace: [29552.679023] [] queued_spin_lock_slowpath+0xb/0xf [29552.679026] [] _raw_spin_lock+0x20/0x30 [29552.679044] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29552.679053] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29552.679067] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29552.679080] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29552.679094] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29552.679108] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29552.679123] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29552.679126] [] ? wake_up_state+0x20/0x20 [29552.679140] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29552.679142] [] kthread+0xd1/0xe0 [29552.679143] [] ? insert_kthread_work+0x40/0x40 [29552.679146] [] ret_from_fork_nospec_begin+0x7/0x21 [29552.679148] [] ? insert_kthread_work+0x40/0x40 [29552.679148] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [29560.673139] NMI watchdog: BUG: soft lockup - CPU#12 stuck for 22s! [ldlm_bl_20:187153] [29560.674189] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29560.674231] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29560.674258] CPU: 12 PID: 187153 Comm: ldlm_bl_20 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29560.674260] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29560.674262] task: ffff9dbf2c38b0c0 ti: ffff9dba18704000 task.ti: ffff9dba18704000 [29560.674263] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x158/0x200 [29560.674270] RSP: 0018:ffff9dba18707c70 EFLAGS: 00000202 [29560.674271] RAX: 0000000000000001 RBX: ffff9dab7cecf840 RCX: 0000000000610000 [29560.674272] RDX: 0000000000690001 RSI: 0000000000b10001 RDI: ffff9daab5554478 [29560.674273] RBP: ffff9dba18707c70 R08: ffff9dbf3de1b780 R09: ffff9dbf3df1b780 [29560.674274] R10: 0000000000000000 R11: fffff90309653500 R12: ffff9dba18707c38 [29560.674275] R13: ffff9dae4d38cde0 R14: ffff9dacfdf603f8 R15: ffff9dae4d38cec8 [29560.674277] FS: 0000000000000000(0000) GS:ffff9dbf3de00000(0000) knlGS:0000000000000000 [29560.674278] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29560.674279] CR2: 00007fc89e18c000 CR3: 0000001e0b6ca000 CR4: 00000000003607e0 [29560.674280] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29560.674281] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29560.674282] Call Trace: [29560.674288] [] queued_spin_lock_slowpath+0xb/0xf [29560.674292] [] _raw_spin_lock+0x20/0x30 [29560.674329] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29560.674340] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29560.674368] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29560.674382] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29560.674397] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29560.674412] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29560.674427] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29560.674430] [] ? wake_up_state+0x20/0x20 [29560.674444] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29560.674447] [] kthread+0xd1/0xe0 [29560.674449] [] ? insert_kthread_work+0x40/0x40 [29560.674451] [] ret_from_fork_nospec_begin+0x7/0x21 [29560.674453] [] ? insert_kthread_work+0x40/0x40 [29560.674454] Code: 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 85 c0 74 21 83 f8 03 75 10 eb 1a 66 2e 0f 1f 84 00 00 00 00 00 85 c0 74 0c f3 90 8b 17 <0f> b7 c2 83 f8 03 75 f0 be 01 00 00 00 eb 15 66 0f 1f 84 00 00 [29560.680139] NMI watchdog: BUG: soft lockup - CPU#15 stuck for 22s! [ldlm_bl_07:187140] [29560.681205] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29560.681238] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29560.681259] CPU: 15 PID: 187140 Comm: ldlm_bl_07 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29560.681260] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29560.681262] task: ffff9da3d6a1e180 ti: ffff9dbc05a40000 task.ti: ffff9dbc05a40000 [29560.681263] RIP: 0010:[] [] ldlm_kms_shift_cb+0x48/0x220 [ptlrpc] [29560.681289] RSP: 0018:ffff9dbc05a43c18 EFLAGS: 00000202 [29560.681290] RAX: ffff9da5f9ead7c0 RBX: ffff9db83f66da00 RCX: 0000000000000000 [29560.681291] RDX: ffff9da5f9ead9c8 RSI: ffff9dbc05a43c68 RDI: ffff9dabdabab100 [29560.681292] RBP: ffff9dbc05a43c28 R08: 0000000000000000 R09: ffff9dbf3e09b780 [29560.681294] R10: 0000000000000000 R11: fffff902e747c000 R12: ffff9db1feb757a8 [29560.681295] R13: 0000000019f89800 R14: 00000000c118f892 R15: ffff9dbc05a43ba8 [29560.681296] FS: 0000000000000000(0000) GS:ffff9dbf3dec0000(0000) knlGS:0000000000000000 [29560.681298] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29560.681299] CR2: 00007f9ceb74d9e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [29560.681300] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29560.681301] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29560.681302] Call Trace: [29560.681317] [] ? ldlm_extent_shift_kms+0x1b0/0x1b0 [ptlrpc] [29560.681335] [] interval_iterate_reverse+0x53/0x270 [ptlrpc] [29560.681349] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [29560.681353] [] ? remove_waiter+0x66/0x126 [29560.681362] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [29560.681375] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29560.681396] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29560.681410] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29560.681425] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29560.681441] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29560.681444] [] ? wake_up_state+0x20/0x20 [29560.681459] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29560.681461] [] kthread+0xd1/0xe0 [29560.681463] [] ? insert_kthread_work+0x40/0x40 [29560.681466] [] ret_from_fork_nospec_begin+0x7/0x21 [29560.681468] [] ? insert_kthread_work+0x40/0x40 [29560.681469] Code: f6 05 5a b1 c5 ff 01 0f 85 06 01 00 00 b9 01 00 00 00 49 8b 54 24 38 49 83 c4 38 49 39 d4 48 8d 82 f8 fd ff ff 0f 84 c0 00 00 00 <48> be 00 00 00 00 00 04 00 00 48 85 b2 f0 fe ff ff 74 20 48 8b [29560.698139] NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [ldlm_bl_23:191094] [29560.699011] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [29560.699044] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [29560.699065] CPU: 22 PID: 191094 Comm: ldlm_bl_23 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [29560.699067] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [29560.699068] task: ffff9dbf3a42b0c0 ti: ffff9dbeb0218000 task.ti: ffff9dbeb0218000 [29560.699069] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [29560.699076] RSP: 0018:ffff9dbeb021bc70 EFLAGS: 00000246 [29560.699077] RAX: 0000000000000000 RBX: ffff9dab7cece1c0 RCX: 0000000000b10000 [29560.699078] RDX: ffff9dbf3df1b780 RSI: 0000000000810001 RDI: ffff9daab5554478 [29560.699079] RBP: ffff9dbeb021bc70 R08: ffff9dbf3e09b780 R09: 0000000000000000 [29560.699080] R10: 0000000000000000 R11: fffff902f06ae100 R12: ffff9dbeb021bc38 [29560.699080] R13: ffff9db66ceb0000 R14: ffff9db016669e68 R15: ffff9db66ceb00e8 [29560.699082] FS: 0000000000000000(0000) GS:ffff9dbf3e080000(0000) knlGS:0000000000000000 [29560.699083] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [29560.699084] CR2: 00007feadef5b9e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [29560.699085] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [29560.699086] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [29560.699087] Call Trace: [29560.699091] [] queued_spin_lock_slowpath+0xb/0xf [29560.699094] [] _raw_spin_lock+0x20/0x30 [29560.699123] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [29560.699132] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [29560.699156] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [29560.699170] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [29560.699185] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [29560.699199] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [29560.699213] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [29560.699216] [] ? wake_up_state+0x20/0x20 [29560.699229] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [29560.699231] [] kthread+0xd1/0xe0 [29560.699233] [] ? insert_kthread_work+0x40/0x40 [29560.699236] [] ret_from_fork_nospec_begin+0x7/0x21 [29560.699237] [] ? insert_kthread_work+0x40/0x40 [29560.699238] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [50984.874658] Lustre: DEBUG MARKER: Thu Sep 19 00:07:55 2019 [50988.230760] Lustre: DEBUG MARKER: Thu Sep 19 00:07:58 2019 [51321.088908] Lustre: 112021:0:(client.c:2210:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1568819604/real 1568819608] req@ffff9da26aa5c380 x1645001072026240/t0(0) o103->scratch0-OST0002-osc-ffff9dbd6427e800@10.0.10.176@o2ib10:17/18 lens 328/224 e 0 to 1 dl 1568819611 ref 1 fl Rpc:RXQ/0/ffffffff rc 0/-1 job:'' [51321.088919] Lustre: scratch0-OST0002-osc-ffff9dbd6427e800: Connection to scratch0-OST0002 (at 10.0.10.176@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [51323.369116] Lustre: 112018:0:(client.c:2210:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1568819604/real 1568819608] req@ffff9dad6428ad00 x1645001072067712/t0(0) o103->scratch0-OST0003-osc-ffff9dbd6427e800@10.0.10.176@o2ib10:17/18 lens 328/224 e 0 to 1 dl 1568819611 ref 1 fl Rpc:RXQ/0/ffffffff rc 0/-1 job:'' [51323.369121] Lustre: 112018:0:(client.c:2210:ptlrpc_expire_one_request()) Skipped 113 previous similar messages [51324.712912] Lustre: 112035:0:(client.c:2210:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1568819604/real 1568819604] req@ffff9db4e1aa2400 x1645001072017216/t0(0) o103->scratch0-OST0005-osc-ffff9dbd6427e800@10.0.10.177@o2ib10:17/18 lens 328/224 e 0 to 1 dl 1568819611 ref 1 fl Rpc:RXQ/0/ffffffff rc 0/-1 job:'' [51324.712917] Lustre: 112035:0:(client.c:2210:ptlrpc_expire_one_request()) Skipped 83 previous similar messages [51324.712962] Lustre: scratch0-OST0007-osc-ffff9dbd6427e800: Connection to scratch0-OST0007 (at 10.0.10.178@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [51324.712964] Lustre: Skipped 3 previous similar messages [51324.716864] Lustre: scratch0-OST0002-osc-ffff9dbd6427e800: Connection restored to 10.0.10.176@o2ib10 (at 10.0.10.176@o2ib10) [51325.762323] Lustre: scratch0-OST0001-osc-ffff9dbd6427e800: Connection to scratch0-OST0001 (at 10.0.10.175@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [51327.428727] Lustre: scratch0-OST0007-osc-ffff9dbd6427e800: Connection restored to 10.0.10.178@o2ib10 (at 10.0.10.178@o2ib10) [51327.428730] Lustre: Skipped 3 previous similar messages [51328.518170] NMI watchdog: BUG: soft lockup - CPU#11 stuck for 22s! [ldlm_bl_42:190072] [51328.519160] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [51328.519202] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [51328.519229] CPU: 11 PID: 190072 Comm: ldlm_bl_42 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [51328.519230] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [51328.519232] task: ffff9dbf386eb0c0 ti: ffff9daebbf00000 task.ti: ffff9daebbf00000 [51328.519233] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x158/0x200 [51328.519243] RSP: 0018:ffff9daebbf03d40 EFLAGS: 00000202 [51328.519244] RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000590000 [51328.519245] RDX: 0000000000290001 RSI: 0000000000b10101 RDI: ffff9dbe0e79afdc [51328.519246] RBP: ffff9daebbf03d40 R08: ffff9daf3e8db780 R09: ffff9dbf3e01b780 [51328.519247] R10: 0000000000000001 R11: fffff90337b0dd80 R12: ffff9dbcec376e80 [51328.519248] R13: 00000000b41dc05d R14: 0000000000000246 R15: ffff9db5f1ff6d08 [51328.519250] FS: 0000000000000000(0000) GS:ffff9daf3e8c0000(0000) knlGS:0000000000000000 [51328.519251] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [51328.519252] CR2: 0000000000b4c200 CR3: 000000153ac10000 CR4: 00000000003607e0 [51328.519254] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [51328.519255] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [51328.519255] Call Trace: [51328.519263] [] queued_spin_lock_slowpath+0xb/0xf [51328.519268] [] _raw_spin_lock+0x20/0x30 [51328.519300] [] lock_res_and_lock+0x2c/0x50 [ptlrpc] [51328.519315] [] ldlm_cli_cancel_local+0x7a/0x3f0 [ptlrpc] [51328.519330] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [51328.519345] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [51328.519349] [] ? wake_up_state+0x20/0x20 [51328.519363] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [51328.519367] [] kthread+0xd1/0xe0 [51328.519369] [] ? insert_kthread_work+0x40/0x40 [51328.519372] [] ret_from_fork_nospec_begin+0x7/0x21 [51328.519373] [] ? insert_kthread_work+0x40/0x40 [51328.519374] Code: 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 85 c0 74 21 83 f8 03 75 10 eb 1a 66 2e 0f 1f 84 00 00 00 00 00 85 c0 74 0c f3 90 8b 17 <0f> b7 c2 83 f8 03 75 f0 be 01 00 00 00 eb 15 66 0f 1f 84 00 00 [51330.381331] Lustre: scratch0-OST0000-osc-ffff9dbd6427e800: Connection restored to 10.0.10.175@o2ib10 (at 10.0.10.175@o2ib10) [51330.703181] Lustre: 112027:0:(client.c:2210:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1568819604/real 1568819604] req@ffff9dada72dcc80 x1645001072017856/t0(0) o103->scratch0-OST0003-osc-ffff9dbd6427e800@10.0.10.176@o2ib10:17/18 lens 328/224 e 0 to 1 dl 1568819611 ref 1 fl Rpc:RXQ/0/ffffffff rc 0/-1 job:'' [51330.703185] Lustre: 112027:0:(client.c:2210:ptlrpc_expire_one_request()) Skipped 10 previous similar messages [51333.025074] Lustre: scratch0-OST0002-osc-ffff9dbd6427e800: Connection to scratch0-OST0002 (at 10.0.10.176@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [51333.025078] Lustre: Skipped 1 previous similar message [51334.070811] Lustre: scratch0-OST0005-osc-ffff9dbd6427e800: Connection restored to 10.0.10.177@o2ib10 (at 10.0.10.177@o2ib10) [51334.070815] Lustre: Skipped 1 previous similar message [51336.497210] NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [kworker/2:2:187086] [51336.498206] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [51336.498247] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [51336.498274] CPU: 2 PID: 187086 Comm: kworker/2:2 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [51336.498275] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [51336.498313] Workqueue: events ldlm_pools_recalc_task [ptlrpc] [51336.498314] task: ffff9daf3d7e8000 ti: ffff9d9f10d54000 task.ti: ffff9d9f10d54000 [51336.498316] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [51336.498323] RSP: 0018:ffff9d9f10d57c70 EFLAGS: 00000246 [51336.498324] RAX: 0000000000000000 RBX: ffffffffffffff10 RCX: 0000000000110000 [51336.498325] RDX: ffff9daf3e71b780 RSI: 0000000000210101 RDI: ffff9da1f812123c [51336.498326] RBP: ffff9d9f10d57c70 R08: ffff9daf3e69b780 R09: 0000000000000000 [51336.498327] R10: 0000fbc2a60f46c0 R11: 0000000000000000 R12: 0000000000190001 [51336.498328] R13: ffff9daf3e6db780 R14: 0000000000110000 R15: 0000000000000000 [51336.498329] FS: 0000000000000000(0000) GS:ffff9daf3e680000(0000) knlGS:0000000000000000 [51336.498330] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [51336.498331] CR2: 0000000000b24de0 CR3: 000000103ada2000 CR4: 00000000003607e0 [51336.498333] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [51336.498334] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [51336.498335] Call Trace: [51336.498341] [] queued_spin_lock_slowpath+0xb/0xf [51336.498346] [] _raw_spin_lock+0x20/0x30 [51336.498359] [] lock_res_and_lock+0x1f/0x50 [ptlrpc] [51336.498373] [] ldlm_prepare_lru_list+0x228/0x4c0 [ptlrpc] [51336.498386] [] ? ldlm_iter_helper+0x20/0x20 [ptlrpc] [51336.498400] [] ldlm_cancel_lru+0x61/0x170 [ptlrpc] [51336.498415] [] ldlm_cli_pool_recalc+0x18e/0x210 [ptlrpc] [51336.498429] [] ldlm_pool_recalc+0x10c/0x1f0 [ptlrpc] [51336.498443] [] ldlm_pools_recalc_task+0x104/0x190 [ptlrpc] [51336.498447] [] process_one_work+0x17f/0x440 [51336.498449] [] worker_thread+0x126/0x3c0 [51336.498451] [] ? manage_workers.isra.25+0x2a0/0x2a0 [51336.498453] [] kthread+0xd1/0xe0 [51336.498455] [] ? insert_kthread_work+0x40/0x40 [51336.498458] [] ret_from_fork_nospec_begin+0x7/0x21 [51336.498459] [] ? insert_kthread_work+0x40/0x40 [51336.498460] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [51336.755863] Lustre: DEBUG MARKER: Thu Sep 19 00:13:47 2019 [51339.418337] Lustre: 112028:0:(client.c:2210:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1568819608/real 1568819611] req@ffff9dabfd21f980 x1645001072071872/t0(0) o103->scratch0-OST0006-osc-ffff9dbd6427e800@10.0.10.178@o2ib10:17/18 lens 328/224 e 0 to 1 dl 1568819617 ref 1 fl Rpc:RXQ/0/ffffffff rc 0/-1 job:'' [51339.418342] Lustre: 112028:0:(client.c:2210:ptlrpc_expire_one_request()) Skipped 24 previous similar messages [51339.602027] Lustre: scratch0-OST0003-osc-ffff9dbd6427e800: Connection to scratch0-OST0003 (at 10.0.10.176@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [51339.602031] Lustre: Skipped 3 previous similar messages [51345.196124] Lustre: scratch0-OST0003-osc-ffff9dbd6427e800: Connection restored to 10.0.10.176@o2ib10 (at 10.0.10.176@o2ib10) [51345.196128] Lustre: Skipped 3 previous similar messages [51349.954319] Lustre: 112031:0:(client.c:2210:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1568819618/real 1568819618] req@ffff9dab21951b00 x1645001072137792/t0(0) o103->scratch0-OST0005-osc-ffff9dbd6427e800@10.0.10.177@o2ib10:17/18 lens 328/224 e 0 to 1 dl 1568819627 ref 1 fl Rpc:RXQ/0/ffffffff rc 0/-1 job:'' [51349.954324] Lustre: 112031:0:(client.c:2210:ptlrpc_expire_one_request()) Skipped 62 previous similar messages [51354.377220] Lustre: scratch0-OST0000-osc-ffff9dbd6427e800: Connection restored to 10.0.10.175@o2ib10 (at 10.0.10.175@o2ib10) [51354.377225] Lustre: Skipped 1 previous similar message [51356.518316] NMI watchdog: BUG: soft lockup - CPU#11 stuck for 21s! [ldlm_bl_84:3788] [51356.519313] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [51356.519356] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [51356.519382] CPU: 11 PID: 3788 Comm: ldlm_bl_84 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [51356.519384] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [51356.519386] task: ffff9dba47b4a080 ti: ffff9daa17830000 task.ti: ffff9daa17830000 [51356.519387] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [51356.519397] RSP: 0018:ffff9daa17833c28 EFLAGS: 00000246 [51356.519398] RAX: 0000000000000000 RBX: 0000000000190000 RCX: 0000000000590000 [51356.519399] RDX: ffff9daf3e81b780 RSI: 0000000000410101 RDI: ffff9daa5fdce57c [51356.519400] RBP: ffff9daa17833c28 R08: ffff9daf3e8db780 R09: 0000000000000000 [51356.519401] R10: 00000000df02ba01 R11: fffff902ff7c0a80 R12: 0000000000590000 [51356.519402] R13: 0000000000000000 R14: ffff9daf3e8db780 R15: 0000000000000000 [51356.519404] FS: 0000000000000000(0000) GS:ffff9daf3e8c0000(0000) knlGS:0000000000000000 [51356.519405] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [51356.519406] CR2: 00007f376a05f000 CR3: 0000002036a08000 CR4: 00000000003607e0 [51356.519407] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [51356.519408] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [51356.519409] Call Trace: [51356.519416] [] queued_spin_lock_slowpath+0xb/0xf [51356.519422] [] _raw_spin_lock+0x20/0x30 [51356.519452] [] lock_res_and_lock+0x1f/0x50 [ptlrpc] [51356.519467] [] ldlm_prepare_lru_list+0x228/0x4c0 [ptlrpc] [51356.519481] [] ? ldlm_iter_helper+0x20/0x20 [ptlrpc] [51356.519496] [] ldlm_cancel_lru_local+0x1a/0x30 [ptlrpc] [51356.519510] [] ldlm_cli_cancel+0x216/0x650 [ptlrpc] [51356.519513] [] ? __slab_free+0x81/0x2f0 [51356.519523] [] osc_ldlm_blocking_ast+0x17a/0x3a0 [osc] [51356.519540] [] ldlm_handle_bl_callback+0xed/0x4e0 [ptlrpc] [51356.519555] [] ldlm_bl_thread_main+0x800/0xa40 [ptlrpc] [51356.519571] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [51356.519576] [] kthread+0xd1/0xe0 [51356.519578] [] ? insert_kthread_work+0x40/0x40 [51356.519581] [] ret_from_fork_nospec_begin+0x7/0x21 [51356.519583] [] ? insert_kthread_work+0x40/0x40 [51356.519584] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [51360.127913] Lustre: scratch0-OST0007-osc-ffff9dbd6427e800: Connection to scratch0-OST0007 (at 10.0.10.178@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [51360.127917] Lustre: Skipped 3 previous similar messages [51364.497357] NMI watchdog: BUG: soft lockup - CPU#2 stuck for 23s! [kworker/2:2:187086] [51364.498362] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg [51364.498404] i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [51364.498430] CPU: 2 PID: 187086 Comm: kworker/2:2 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [51364.498432] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [51364.498468] Workqueue: events ldlm_pools_recalc_task [ptlrpc] [51364.498469] task: ffff9daf3d7e8000 ti: ffff9d9f10d54000 task.ti: ffff9d9f10d54000 [51364.498471] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [51364.498477] RSP: 0018:ffff9d9f10d57c70 EFLAGS: 00000246 [51364.498478] RAX: 0000000000000000 RBX: ffffffffc1056c10 RCX: 0000000000110000 [51364.498479] RDX: ffff9dbf3df1b780 RSI: 0000000000810101 RDI: ffff9db9af28827c [51364.498480] RBP: ffff9d9f10d57c70 R08: ffff9daf3e69b780 R09: 0000000000000000 [51364.498481] R10: ffffffffc1056c10 R11: 0000000000000000 R12: 0000000000310001 [51364.498482] R13: ffff9d9f10d57c60 R14: 0000000000110000 R15: 0000000000000001 [51364.498484] FS: 0000000000000000(0000) GS:ffff9daf3e680000(0000) knlGS:0000000000000000 [51364.498485] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [51364.498486] CR2: 00007ffc108b9fa8 CR3: 000000101d268000 CR4: 00000000003607e0 [51364.498487] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [51364.498488] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [51364.498489] Call Trace: [51364.498496] [] queued_spin_lock_slowpath+0xb/0xf [51364.498501] [] _raw_spin_lock+0x20/0x30 [51364.498514] [] lock_res_and_lock+0x1f/0x50 [ptlrpc] [51364.498529] [] ldlm_prepare_lru_list+0x228/0x4c0 [ptlrpc] [51364.498542] [] ? ldlm_iter_helper+0x20/0x20 [ptlrpc] [51364.498556] [] ldlm_cancel_lru+0x61/0x170 [ptlrpc] [51364.498571] [] ldlm_cli_pool_recalc+0x18e/0x210 [ptlrpc] [51364.498585] [] ldlm_pool_recalc+0x10c/0x1f0 [ptlrpc] [51364.498599] [] ldlm_pools_recalc_task+0x104/0x190 [ptlrpc] [51364.498603] [] process_one_work+0x17f/0x440 [51364.498605] [] worker_thread+0x126/0x3c0 [51364.498607] [] ? manage_workers.isra.25+0x2a0/0x2a0 [51364.498610] [] kthread+0xd1/0xe0 [51364.498611] [] ? insert_kthread_work+0x40/0x40 [51364.498615] [] ret_from_fork_nospec_begin+0x7/0x21 [51364.498616] [] ? insert_kthread_work+0x40/0x40 [51364.498617] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [51366.067015] Lustre: 112039:0:(client.c:2210:ptlrpc_expire_one_request()) @@@ Request sent has timed out for slow reply: [sent 1568819641/real 1568819643] req@ffff9dbabf4be300 x1645001072264000/t0(0) o103->scratch0-OST0007-osc-ffff9dbd6427e800@10.0.10.178@o2ib10:17/18 lens 328/224 e 0 to 1 dl 1568819648 ref 1 fl Rpc:RXQ/0/ffffffff rc 0/-1 job:'' [51366.067021] Lustre: 112039:0:(client.c:2210:ptlrpc_expire_one_request()) Skipped 259 previous similar messages [51959.105295] Lustre: DEBUG MARKER: Thu Sep 19 00:24:09 2019 [51967.614957] Lustre: DEBUG MARKER: Thu Sep 19 00:24:17 2019 [51969.099003] Lustre: DEBUG MARKER: Thu Sep 19 00:24:19 2019 [51970.347039] Lustre: DEBUG MARKER: Thu Sep 19 00:24:20 2019 [51971.659039] Lustre: DEBUG MARKER: Thu Sep 19 00:24:21 2019 [51973.114118] Lustre: DEBUG MARKER: Thu Sep 19 00:24:23 2019 [51974.354160] Lustre: DEBUG MARKER: Thu Sep 19 00:24:24 2019 [51975.515217] Lustre: DEBUG MARKER: Thu Sep 19 00:24:25 2019 [51980.235313] Lustre: DEBUG MARKER: Thu Sep 19 00:24:30 2019 [52001.202894] Lustre: DEBUG MARKER: Thu Sep 19 00:24:51 2019 [52012.772885] Lustre: DEBUG MARKER: Thu Sep 19 00:25:03 2019 [52014.808389] Lustre: DEBUG MARKER: Thu Sep 19 00:25:05 2019 [52029.529103] Lustre: DEBUG MARKER: Thu Sep 19 00:25:19 2019 [52221.779274] bash (11156): drop_caches: 3 [52269.367200] Lustre: DEBUG MARKER: Thu Sep 19 00:29:19 2019 [85211.142420] Lustre: Unmounted scratch0-client [85232.771322] LNet: Removed LNI 10.0.13.150@o2ib10 [85536.116479] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [85536.118027] alg: No test for adler32 (adler32-zlib) [85536.917540] Lustre: Lustre: Build Version: 2.12.56_216_g13a187f [85536.996773] LNet: Using FMR for registration [85537.005213] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [85538.207399] Lustre: Mounted scratch0-client [85661.952401] Lustre: Unmounted scratch0-client [85662.277755] Lustre: Mounted scratch0-client [91977.977376] Lustre: Unmounted scratch0-client [91991.600911] Lustre: Mounted scratch0-client [119127.609944] Lustre: Unmounted scratch0-client [119133.979947] LNet: Removed LNI 10.0.13.150@o2ib10 [131669.759971] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [131669.761129] alg: No test for adler32 (adler32-zlib) [131670.550845] Lustre: Lustre: Build Version: 2.12.56_216_g13a187f [131670.614045] LNet: Using FMR for registration [131670.622257] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [131671.808139] Lustre: Mounted scratch0-client [131961.219639] Lustre: Unmounted scratch0-client [131969.631208] LNet: Removed LNI 10.0.13.150@o2ib10 [132126.073926] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [132126.075136] alg: No test for adler32 (adler32-zlib) [132126.864374] Lustre: Lustre: Build Version: 2.12.58_57_g6e0d014 [132126.928964] LNet: Using FMR for registration [132126.937213] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [132128.121884] Lustre: Mounted scratch0-client [140407.378967] Lustre: DEBUG MARKER: Fri Sep 20 00:58:17 2019 [140411.180780] Lustre: DEBUG MARKER: start [140488.163890] Lustre: DEBUG MARKER: start [141942.683387] Lustre: DEBUG MARKER: start h01.m23.s53 [141978.985456] Lustre: DEBUG MARKER: start h01.m24.s29 [142043.559384] Lustre: DEBUG MARKER: start h01.m25.s34 [142104.526209] Lustre: DEBUG MARKER: start h01.m26.s35 [142165.014378] Lustre: DEBUG MARKER: start h01.m27.s35 [142224.423200] Lustre: DEBUG MARKER: start h01.m28.s34 [142286.454987] Lustre: DEBUG MARKER: start h01.m29.s37 [142348.971163] Lustre: DEBUG MARKER: start h01.m30.s39 [142412.745757] Lustre: DEBUG MARKER: start h01.m31.s43 [142473.180338] Lustre: DEBUG MARKER: start h01.m32.s43 [142533.287211] Lustre: DEBUG MARKER: start h01.m33.s43 [142594.113117] Lustre: DEBUG MARKER: start h01.m34.s44 [146020.165338] Lustre: DEBUG MARKER: start h02.m31.s50 [146081.666390] Lustre: DEBUG MARKER: start h02.m32.s52 [146150.186239] Lustre: DEBUG MARKER: start h02.m34.s00 [146211.725959] Lustre: DEBUG MARKER: start h02.m35.s02 [146272.812413] Lustre: DEBUG MARKER: start h02.m36.s03 [146332.129656] Lustre: DEBUG MARKER: start h02.m37.s02 [146392.929901] Lustre: DEBUG MARKER: start h02.m38.s03 [146460.614796] Lustre: DEBUG MARKER: start h02.m39.s11 [146521.928754] Lustre: DEBUG MARKER: start h02.m40.s12 [146582.427043] Lustre: DEBUG MARKER: start h02.m41.s12 [146644.214234] Lustre: DEBUG MARKER: start h02.m42.s14 [146713.882446] Lustre: DEBUG MARKER: start h02.m43.s24 [146775.029916] Lustre: DEBUG MARKER: start h02.m44.s25 [146835.320498] Lustre: DEBUG MARKER: start h02.m45.s25 [146897.712566] Lustre: DEBUG MARKER: start h02.m46.s28 [146959.658237] Lustre: DEBUG MARKER: start h02.m47.s30 [147020.693439] Lustre: DEBUG MARKER: start h02.m48.s31 [147083.638525] Lustre: DEBUG MARKER: start h02.m49.s34 [147143.712112] Lustre: DEBUG MARKER: start h02.m50.s34 [150251.858766] Lustre: 0 MB is too small for debug buffer size, setting it to 192 MB. [152951.960201] bash (133657): drop_caches: 3 [172089.504665] Lustre: Unmounted scratch0-client [172095.739370] LNet: Removed LNI 10.0.13.150@o2ib10 [172490.809171] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [172490.810515] alg: No test for adler32 (adler32-zlib) [172491.612594] Lustre: Lustre: Build Version: 2.12.56_91_g72479a5 [172491.688889] LNet: Using FMR for registration [172491.697567] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [172492.885054] Lustre: Mounted scratch0-client [176812.142717] Lustre: Unmounted scratch0-client [176822.039884] LNet: Removed LNI 10.0.13.150@o2ib10 [177188.384328] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [177188.385480] alg: No test for adler32 (adler32-zlib) [177189.172969] Lustre: Lustre: Build Version: 2.12.58_57_g6e0d014 [177189.234187] LNet: Using FMR for registration [177189.242372] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [177190.427118] Lustre: Mounted scratch0-client [241389.575446] Lustre: Mounted scratch7-client [241413.021837] Lustre: Unmounted scratch7-client [241432.116469] Lustre: Mounted scratch7-client [242370.950511] Lustre: Unmounted scratch7-client [242370.950515] Lustre: Skipped 1 previous similar message [255189.841799] Lustre: Mounted scratch7-client [255589.232422] Lustre: Unmounted scratch7-client [256246.678799] Lustre: Mounted scratch7-client [257579.498370] Lustre: Unmounted scratch7-client [259372.386929] Lustre: Mounted scratch7-client [259406.419766] Lustre: Unmounted scratch7-client [259416.755561] Lustre: Mounted scratch7-client [259513.758732] Lustre: Unmounted scratch7-client [259517.113244] Lustre: Mounted scratch7-client [338387.707635] Lustre: Unmounted scratch7-client [347835.031910] Lustre: Mounted scratch7-client [352442.694097] Lustre: Unmounted scratch7-client [352957.945739] Lustre: Mounted scratch7-client [479298.244413] Lustre: Unmounted scratch7-client [479304.024508] LNet: Removed LNI 10.0.13.150@o2ib10 [509409.657601] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [509409.658861] alg: No test for adler32 (adler32-zlib) [509410.447193] Lustre: Lustre: Build Version: 2.12.58_57_g6e0d014 [509410.510874] LNet: Using FMR for registration [509410.519236] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [509416.510114] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [509416.510127] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [509417.510126] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [509420.510113] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 4 seconds [509420.510134] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 1 previous similar message [509420.511406] LNetError: 62878:0:(peer.c:3713:lnet_peer_ni_add_to_recoveryq_locked()) lpni 10.0.10.220@o2ib10 added to recovery queue. Health = 900 [509421.521078] Lustre: Mounted scratch7-client [509426.510097] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [509436.510000] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 10 seconds [509436.510009] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 506 [509446.509989] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 20 seconds [509480.509793] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [509480.509800] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 1 previous similar message [509480.509810] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [509492.509751] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [509511.509652] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [509526.509583] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [509526.509591] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 2 previous similar messages [509526.509604] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [509555.509397] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [509555.511835] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 1 previous similar message [509600.509218] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [509600.509253] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 4 previous similar messages [509600.509274] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [509600.511764] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 2 previous similar messages [509665.508829] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [509665.511240] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 4 previous similar messages [509730.508545] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [509730.508553] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 9 previous similar messages [509800.508119] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [509800.510507] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 9 previous similar messages [509995.507148] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [509995.507155] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 18 previous similar messages [510065.506736] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [510065.509171] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 16 previous similar messages [510300.150305] Lustre: Unmounted scratch7-client [510412.515429] Lustre: Mounted scratch7-client [510510.504488] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [510510.504496] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [510590.504020] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [510590.506614] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 25 previous similar messages [510796.728406] Lustre: Unmounted scratch7-client [511111.501136] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 8 seconds [511111.501144] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [511534.497828] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 0 [511534.500375] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 6 previous similar messages [511537.507238] Lustre: Mounted scratch7-client [511712.495917] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [511712.495924] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 45 previous similar messages [512140.493459] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [512140.496275] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 28 previous similar messages [512321.492096] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [512321.492100] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 44 previous similar messages [512636.755802] Lustre: Unmounted scratch7-client [512754.490077] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 334 [512754.492920] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 22 previous similar messages [512781.544776] Lustre: Mounted scratch7-client [512936.489282] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [512936.489289] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 46 previous similar messages [513355.492605] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [513355.495453] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 39 previous similar messages [513539.491063] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [513539.491067] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 44 previous similar messages [513989.495337] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [513989.498290] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 26 previous similar messages [514141.496640] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [514141.496647] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [514746.495011] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 3 seconds [514746.495016] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [515356.491842] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 36 seconds [515356.491847] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [515973.488443] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [515973.488450] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [516581.485362] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 6 seconds [516581.485367] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [517187.482255] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 10 seconds [517187.482260] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [517804.478931] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [517804.478939] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [518416.475351] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 35 seconds [518416.475358] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [519034.471991] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [519034.471998] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [519646.468809] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 36 seconds [519646.468813] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [520263.465758] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [520263.465766] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [520865.462926] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [520865.462934] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [521477.459973] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 10 seconds [521477.459978] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 43 previous similar messages [522086.456493] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 17 seconds [522086.456500] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [522701.453516] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 5 seconds [522701.453521] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 43 previous similar messages [523316.450306] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 18 seconds [523316.450311] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [523926.447197] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [523926.447205] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [524532.444042] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 4 seconds [524532.444049] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [525136.440904] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 6 seconds [525136.440908] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [525746.437627] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 14 seconds [525746.437631] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [526359.434472] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [526359.434476] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [526966.431223] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 5 seconds [526966.431227] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [527576.428029] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 13 seconds [527576.428036] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [528187.424809] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 22 seconds [528187.424817] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [528796.421557] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 3 seconds [528796.421564] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [529411.417857] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 16 seconds [529411.417865] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 43 previous similar messages [530022.414663] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [530022.414670] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [530625.411355] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [530625.411362] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 39 previous similar messages [530690.544293] Lustre: Unmounted scratch7-client [531231.408054] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 30 seconds [531231.408058] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [531842.404738] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 35 seconds [531842.404742] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [532446.401504] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 33 seconds [532446.401508] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [533056.398405] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 36 seconds [533056.398409] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [533661.395240] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 36 seconds [533661.395247] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [534266.392180] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 35 seconds [534266.392188] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [534872.389175] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 35 seconds [534872.389183] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [535476.386051] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 33 seconds [535476.386055] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [536081.382905] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 32 seconds [536081.382913] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [536696.379686] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 40 seconds [536696.379691] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [537301.376612] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 39 seconds [537301.376616] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [537912.373681] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 45 seconds [537912.373685] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [538516.370733] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 43 seconds [538516.370740] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [539126.368081] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 46 seconds [539126.368085] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [539741.365136] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 56 seconds [539741.365144] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [540351.362174] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 60 seconds [540351.362178] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [540961.359063] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 64 seconds [540961.359070] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [541572.355985] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 69 seconds [541572.355989] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [542182.352965] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 73 seconds [542182.352973] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [542796.349832] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 80 seconds [542796.349836] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [543401.346817] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 80 seconds [543401.346825] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [544006.343802] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 79 seconds [544006.343810] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [544611.340730] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 77 seconds [544611.340735] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [545217.337835] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 78 seconds [545217.337839] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [545826.334861] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 81 seconds [545826.334865] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [546431.331957] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 79 seconds [546431.331965] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [547036.329000] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 78 seconds [547036.329004] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [547641.326324] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 77 seconds [547641.326332] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [548252.323380] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 83 seconds [548252.323384] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [548866.320402] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 91 seconds [548866.320410] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [549471.317494] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 90 seconds [549471.317498] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [550076.314339] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 88 seconds [550076.314359] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [550263.794443] LustreError: 62909:0:(mgc_request.c:250:do_config_log_add()) MGC10.0.10.248@o2ib10: failed processing log, type 1: rc = -5 [550263.990187] Lustre: Mounted scratch0-client [550271.474451] LustreError: 62916:0:(mgc_request.c:598:do_requeue()) failed processing log: -5 [550286.885352] LustreError: 62916:0:(mgc_request.c:598:do_requeue()) failed processing log: -5 [550294.796368] LustreError: 15c-8: MGC10.0.10.248@o2ib10: Confguration from log scratch7-client failed from MGS -5. Communication error between node & MGS, a bad configuration, or other errors. See syslog for more info [550294.799097] Lustre: Unmounted scratch7-client [550294.799711] LustreError: 62909:0:(obd_mount.c:1669:lustre_fill_super()) Unable to mount (-5) [550392.312812] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [550392.315619] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 1 previous similar message [550476.312370] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [550476.315146] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 7 previous similar messages [550631.311637] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [550631.314356] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 13 previous similar messages [550684.311415] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [550684.311420] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 47 previous similar messages [551292.308610] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 19 seconds [551292.308614] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [551901.305885] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [551901.305892] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [552506.302787] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 4 seconds [552506.302794] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [553119.299427] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [553119.299449] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [553731.295080] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [553731.295084] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [554341.291612] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 11 seconds [554341.291616] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [554942.288373] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 6 seconds [554942.288380] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [555551.285182] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 13 seconds [555551.285186] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [556156.282042] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 8 seconds [556156.282046] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [556767.279447] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [556767.279455] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [557376.276405] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 7 seconds [557376.276413] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [557977.273552] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 6 seconds [557977.273560] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [558581.270968] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 8 seconds [558581.270973] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [559191.267919] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 13 seconds [559191.267923] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [559807.264725] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [559807.264730] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [559826.264598] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [559826.267300] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 5 previous similar messages [559872.264359] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [559872.266974] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 3 previous similar messages [559941.646316] bash (138233): drop_caches: 3 [560002.263741] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [560002.266441] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 3 previous similar messages [560161.263144] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [560161.265736] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 7 previous similar messages [560416.261660] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 11 seconds [560416.261667] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 47 previous similar messages [560646.260440] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [560646.263134] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 13 previous similar messages [561017.257979] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 21 seconds [561017.257987] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [561621.253790] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 12 seconds [561621.253798] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [562231.250510] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 20 seconds [562231.250518] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [562291.250145] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [562291.252884] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 3 previous similar messages [562376.249752] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [562376.252446] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 5 previous similar messages [562527.249149] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [562527.251817] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 9 previous similar messages [562839.247554] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [562839.247561] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 49 previous similar messages [563446.245372] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 6 seconds [563446.245376] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [564047.242158] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 5 seconds [564047.242166] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [564662.239211] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 17 seconds [564662.239219] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [565266.236171] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 19 seconds [565266.236175] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [565881.233050] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 7 seconds [565881.233054] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 43 previous similar messages [566406.230435] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [566406.233123] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 15 previous similar messages [566496.229985] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 20 seconds [566496.229992] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [567103.226911] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [567103.226918] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [567711.224142] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 6 seconds [567711.224147] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [568322.221033] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 15 seconds [568322.221037] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [568935.217644] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [568935.217648] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [569537.214788] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [569537.214795] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [570140.212194] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [570140.212199] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [570746.209059] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 5 seconds [570746.209066] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [571347.205586] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 4 seconds [571347.205593] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [571961.202556] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 16 seconds [571961.202561] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [572576.199646] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 4 seconds [572576.199651] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 43 previous similar messages [573181.196821] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 6 seconds [573181.196828] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [573786.193965] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 9 seconds [573786.193970] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [574387.191041] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 8 seconds [574387.191049] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [575001.188035] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 20 seconds [575001.188043] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [575611.185076] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 3 seconds [575611.185083] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [576216.182177] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 6 seconds [576216.182184] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [576821.179188] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 9 seconds [576821.179195] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [577427.176260] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 13 seconds [577427.176264] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [578042.173353] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [578042.173358] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [578651.170350] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 7 seconds [578651.170354] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [579261.167427] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 15 seconds [579261.167434] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [579876.164427] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 3 seconds [579876.164447] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [580490.161649] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [580490.161657] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [581102.158774] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [581102.158778] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [581707.156020] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 3 seconds [581707.156027] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [582311.153196] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 4 seconds [582311.153201] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [582921.150363] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 12 seconds [582921.150370] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [583526.147521] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 7 seconds [583526.147529] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [584131.144630] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 5 seconds [584131.144637] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [584742.141709] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 2 seconds [584742.141713] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [585343.138840] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [585343.138845] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [585944.136010] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [585944.136015] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [586551.133084] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 2 seconds [586551.133091] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [587156.130156] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [587156.130161] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [587772.127006] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 11 seconds [587772.127014] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [588381.124035] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 3 seconds [588381.124040] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [588996.121102] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 16 seconds [588996.121107] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [589611.118170] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 4 seconds [589611.118177] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [590226.115192] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 17 seconds [590226.115196] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [590841.112471] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 5 seconds [590841.112476] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [591442.109590] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 3 seconds [591442.109594] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [592046.106746] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 5 seconds [592046.106754] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [592656.103926] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 13 seconds [592656.103930] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [593261.101079] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 16 seconds [593261.101085] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [593876.098160] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 4 seconds [593876.098165] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [594481.095295] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 7 seconds [594481.095302] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [595082.092514] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 6 seconds [595082.092518] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [595691.089628] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 13 seconds [595691.089635] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [596296.086749] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 15 seconds [596296.086756] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [596901.083833] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 18 seconds [596901.083837] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [597511.080971] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [597511.080975] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [598122.078038] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 10 seconds [598122.078046] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [598726.075183] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 12 seconds [598726.075191] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [599331.072359] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 15 seconds [599331.072366] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [599946.069454] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 3 seconds [599946.069459] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [600551.066561] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 5 seconds [600551.066569] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [601152.063690] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 4 seconds [601152.063711] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [601766.060812] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 16 seconds [601766.060817] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [602378.057840] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [602378.057845] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [602981.055010] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 2 seconds [602981.055017] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [603596.052072] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 15 seconds [603596.052081] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [604208.049063] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [604208.049067] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [604817.046154] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 7 seconds [604817.046158] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [605421.043146] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 8 seconds [605421.043151] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [606026.040257] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 11 seconds [606026.040264] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 40 previous similar messages [606636.037316] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 19 seconds [606636.037320] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [607244.034385] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [607244.034389] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [607861.031435] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [607861.031440] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [608467.028557] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [608467.028561] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [609075.025787] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [609075.025794] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [609681.022916] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 3 seconds [609681.022920] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [610285.020026] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [610285.020034] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [610891.017127] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [610891.017135] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [611342.227919] Lustre: Unmounted scratch0-client [611497.014261] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [611497.014268] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [612103.011345] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [612103.011353] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [612711.008442] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 3 seconds [612711.008449] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [613316.005564] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 2 seconds [613316.005571] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [613921.002659] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [613921.002663] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [614395.380169] Lustre: Mounted scratch0-client [614436.000261] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [614436.003182] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 1 previous similar message [614451.000165] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [614466.000098] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [614490.999961] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [614491.002725] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 1 previous similar message [614526.999801] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 0 seconds [614526.999806] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 42 previous similar messages [614895.998088] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [614896.000900] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 2 previous similar messages [614978.997762] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) ni 10.0.13.150@o2ib10 added to recovery queue. Health = 900 [614979.000465] LNetError: 95001:0:(lib-msg.c:481:lnet_handle_local_failure()) Skipped 5 previous similar messages [615132.996989] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Timed out tx for 10.0.10.220@o2ib10: 1 seconds [615132.996996] LNet: 95001:0:(o2iblnd_cb.c:3397:kiblnd_check_conns()) Skipped 41 previous similar messages [615355.091990] Lustre: Unmounted scratch0-client [615359.576872] LNet: 50548:0:(api-ni.c:2061:lnet_clear_zombies_nis_locked()) Waiting for zombie LNI 10.0.13.150@o2ib10 [615363.578846] LNet: Removed LNI 10.0.13.150@o2ib10 [618510.025050] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [618510.026324] alg: No test for adler32 (adler32-zlib) [618510.825191] Lustre: Lustre: Build Version: 2.12.58_57_g6e0d014 [618510.904464] LNet: Using FMR for registration [618510.912592] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [618512.110815] Lustre: Mounted scratch0-client [619731.020393] Lustre: Unmounted scratch0-client [619736.462838] LNet: Removed LNI 10.0.13.150@o2ib10 [624845.831154] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [624845.832522] alg: No test for adler32 (adler32-zlib) [624846.622137] Lustre: Lustre: Build Version: 2.12.58_57_g6e0d014 [624846.682904] LNet: Using FMR for registration [624846.692474] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [624847.882888] Lustre: Mounted scratch0-client [630019.528048] Lustre: Unmounted scratch0-client [630479.635775] LNet: Removed LNI 10.0.13.150@o2ib10 [1144826.412973] br0: port 1(vxlan0) entered blocking state [1144826.412977] br0: port 1(vxlan0) entered disabled state [1144826.413173] device vxlan0 entered promiscuous mode [1144826.413828] br0: port 1(vxlan0) entered blocking state [1144826.413831] br0: port 1(vxlan0) entered forwarding state [1144826.501873] br0: port 2(veth0) entered blocking state [1144826.501877] br0: port 2(veth0) entered disabled state [1144826.502054] device veth0 entered promiscuous mode [1144826.502271] IPv6: ADDRCONF(NETDEV_UP): veth0: link is not ready [1144826.502275] br0: port 2(veth0) entered blocking state [1144826.502277] br0: port 2(veth0) entered forwarding state [1144826.598301] IPv6: ADDRCONF(NETDEV_CHANGE): veth0: link becomes ready [1144826.680713] IPv6: ADDRCONF(NETDEV_UP): docker_gwbridge: link is not ready [1144826.693545] docker_gwbridge: port 1(vethe26211f) entered blocking state [1144826.693547] docker_gwbridge: port 1(vethe26211f) entered disabled state [1144826.693685] device vethe26211f entered promiscuous mode [1144826.693905] IPv6: ADDRCONF(NETDEV_UP): vethe26211f: link is not ready [1144826.693910] docker_gwbridge: port 1(vethe26211f) entered blocking state [1144826.693912] docker_gwbridge: port 1(vethe26211f) entered forwarding state [1144826.693935] IPv6: ADDRCONF(NETDEV_CHANGE): docker_gwbridge: link becomes ready [1144826.694247] docker_gwbridge: port 1(vethe26211f) entered disabled state [1144826.724229] IPv6: ADDRCONF(NETDEV_CHANGE): vethe26211f: link becomes ready [1144826.724337] docker_gwbridge: port 1(vethe26211f) entered blocking state [1144826.724340] docker_gwbridge: port 1(vethe26211f) entered forwarding state [1153579.080104] ip_set: protocol 6 [1153579.814372] openvswitch: Open vSwitch switching datapath [1153579.815979] device datapath entered promiscuous mode [1153579.841726] weave: port 1(vethwedu) entered blocking state [1153579.841729] weave: port 1(vethwedu) entered disabled state [1153579.841892] device vethwedu entered promiscuous mode [1153579.842272] device vethwedu left promiscuous mode [1153579.842280] weave: port 1(vethwedu) entered disabled state [1153579.863679] weave: port 1(vethwe-bridge) entered blocking state [1153579.863688] weave: port 1(vethwe-bridge) entered disabled state [1153579.863836] device vethwe-bridge entered promiscuous mode [1153579.864198] IPv6: ADDRCONF(NETDEV_UP): vethwe-datapath: link is not ready [1153579.864426] device vethwe-datapath entered promiscuous mode [1153579.865206] IPv6: ADDRCONF(NETDEV_CHANGE): vethwe-datapath: link becomes ready [1153580.890532] weave: port 1(vethwe-bridge) entered blocking state [1153580.890536] weave: port 1(vethwe-bridge) entered forwarding state [1153580.890622] IPv6: ADDRCONF(NETDEV_UP): weave: link is not ready [1153580.892279] IPv6: ADDRCONF(NETDEV_CHANGE): weave: link becomes ready [1153580.895176] device vxlan-6784 entered promiscuous mode [1153600.163103] weave: port 2(vethweplc81a623) entered blocking state [1153600.163108] weave: port 2(vethweplc81a623) entered disabled state [1153600.163398] device vethweplc81a623 entered promiscuous mode [1153600.189152] IPv6: ADDRCONF(NETDEV_UP): vethweplc81a623: link is not ready [1153600.193978] IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready [1153600.193991] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready [1153600.194130] IPv6: ADDRCONF(NETDEV_CHANGE): vethweplc81a623: link becomes ready [1153600.194291] weave: port 2(vethweplc81a623) entered blocking state [1153600.194294] weave: port 2(vethweplc81a623) entered forwarding state [1153844.324110] weave: port 2(vethweplc81a623) entered disabled state [1153844.336050] device vethweplc81a623 left promiscuous mode [1153844.336059] weave: port 2(vethweplc81a623) entered disabled state [1153985.941033] weave: port 2(vethwedu) entered blocking state [1153985.941038] weave: port 2(vethwedu) entered disabled state [1153985.941149] device vethwedu entered promiscuous mode [1153985.941435] device vethwedu left promiscuous mode [1153985.941442] weave: port 2(vethwedu) entered disabled state [1153987.006077] device vxlan-6784 left promiscuous mode [1153987.021253] device vxlan-6784 entered promiscuous mode [1153996.177575] weave: port 2(vethwepl38c85d7) entered blocking state [1153996.177579] weave: port 2(vethwepl38c85d7) entered disabled state [1153996.177785] device vethwepl38c85d7 entered promiscuous mode [1153996.209399] IPv6: ADDRCONF(NETDEV_UP): vethwepl38c85d7: link is not ready [1153996.213115] IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready [1153996.213125] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready [1153996.213272] IPv6: ADDRCONF(NETDEV_CHANGE): vethwepl38c85d7: link becomes ready [1153996.213388] weave: port 2(vethwepl38c85d7) entered blocking state [1153996.213390] weave: port 2(vethwepl38c85d7) entered forwarding state [1153998.160155] weave: port 3(vethwepl7c88c76) entered blocking state [1153998.160159] weave: port 3(vethwepl7c88c76) entered disabled state [1153998.160387] device vethwepl7c88c76 entered promiscuous mode [1153998.187385] IPv6: ADDRCONF(NETDEV_UP): vethwepl7c88c76: link is not ready [1153998.191248] IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready [1153998.191266] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready [1153998.191397] IPv6: ADDRCONF(NETDEV_CHANGE): vethwepl7c88c76: link becomes ready [1153998.191495] weave: port 3(vethwepl7c88c76) entered blocking state [1153998.191497] weave: port 3(vethwepl7c88c76) entered forwarding state [1154035.450691] weave: port 3(vethwepl7c88c76) entered disabled state [1154035.462576] device vethwepl7c88c76 left promiscuous mode [1154035.462585] weave: port 3(vethwepl7c88c76) entered disabled state [1154035.962170] weave: port 2(vethwepl38c85d7) entered disabled state [1154035.973129] device vethwepl38c85d7 left promiscuous mode [1154035.973139] weave: port 2(vethwepl38c85d7) entered disabled state [1154106.100029] weave: port 2(vethwedu) entered blocking state [1154106.100034] weave: port 2(vethwedu) entered disabled state [1154106.100152] device vethwedu entered promiscuous mode [1154106.100451] device vethwedu left promiscuous mode [1154106.100459] weave: port 2(vethwedu) entered disabled state [1154106.148044] device vxlan-6784 left promiscuous mode [1154106.167291] device vxlan-6784 entered promiscuous mode [1154110.853590] weave: port 2(vethwepld3126c8) entered blocking state [1154110.853595] weave: port 2(vethwepld3126c8) entered disabled state [1154110.853777] device vethwepld3126c8 entered promiscuous mode [1154110.880235] weave: port 3(vethwepl562e691) entered blocking state [1154110.880238] weave: port 3(vethwepl562e691) entered disabled state [1154110.880420] device vethwepl562e691 entered promiscuous mode [1154110.897725] IPv6: ADDRCONF(NETDEV_UP): vethwepld3126c8: link is not ready [1154110.904494] IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready [1154110.904504] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready [1154110.904617] IPv6: ADDRCONF(NETDEV_CHANGE): vethwepld3126c8: link becomes ready [1154110.904770] weave: port 2(vethwepld3126c8) entered blocking state [1154110.904772] weave: port 2(vethwepld3126c8) entered forwarding state [1154110.905843] IPv6: ADDRCONF(NETDEV_UP): vethwepl562e691: link is not ready [1154110.905851] weave: port 3(vethwepl562e691) entered blocking state [1154110.905854] weave: port 3(vethwepl562e691) entered forwarding state [1154110.909646] IPv6: ADDRCONF(NETDEV_CHANGE): vethwepl562e691: link becomes ready [1154207.072248] weave: port 3(vethwepl562e691) entered disabled state [1154207.084274] device vethwepl562e691 left promiscuous mode [1154207.084284] weave: port 3(vethwepl562e691) entered disabled state [1154207.368524] weave: port 2(vethwepld3126c8) entered disabled state [1154207.380305] device vethwepld3126c8 left promiscuous mode [1154207.380315] weave: port 2(vethwepld3126c8) entered disabled state [1154240.267221] weave: port 2(vethwedu) entered blocking state [1154240.267225] weave: port 2(vethwedu) entered disabled state [1154240.267362] device vethwedu entered promiscuous mode [1154240.267623] device vethwedu left promiscuous mode [1154240.267631] weave: port 2(vethwedu) entered disabled state [1154241.344080] device vxlan-6784 left promiscuous mode [1154241.362719] device vxlan-6784 entered promiscuous mode [1154245.161690] weave: port 2(vethwepl2e155ca) entered blocking state [1154245.161694] weave: port 2(vethwepl2e155ca) entered disabled state [1154245.161901] device vethwepl2e155ca entered promiscuous mode [1154245.185567] weave: port 3(vethwepla4b770e) entered blocking state [1154245.185571] weave: port 3(vethwepla4b770e) entered disabled state [1154245.185730] device vethwepla4b770e entered promiscuous mode [1154245.199955] IPv6: ADDRCONF(NETDEV_UP): vethwepl2e155ca: link is not ready [1154245.209702] IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready [1154245.209732] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready [1154245.209866] IPv6: ADDRCONF(NETDEV_CHANGE): vethwepl2e155ca: link becomes ready [1154245.210052] weave: port 2(vethwepl2e155ca) entered blocking state [1154245.210054] weave: port 2(vethwepl2e155ca) entered forwarding state [1154245.211680] IPv6: ADDRCONF(NETDEV_UP): vethwepla4b770e: link is not ready [1154245.211689] weave: port 3(vethwepla4b770e) entered blocking state [1154245.211691] weave: port 3(vethwepla4b770e) entered forwarding state [1154245.215681] IPv6: ADDRCONF(NETDEV_CHANGE): vethwepla4b770e: link becomes ready [1154395.449778] weave: port 3(vethwepla4b770e) entered disabled state [1154395.462191] device vethwepla4b770e left promiscuous mode [1154395.462201] weave: port 3(vethwepla4b770e) entered disabled state [1154395.736182] weave: port 2(vethwepl2e155ca) entered disabled state [1154395.748477] device vethwepl2e155ca left promiscuous mode [1154395.748486] weave: port 2(vethwepl2e155ca) entered disabled state [1154589.840767] weave: port 2(vethwedu) entered blocking state [1154589.840771] weave: port 2(vethwedu) entered disabled state [1154589.840896] device vethwedu entered promiscuous mode [1154589.841170] device vethwedu left promiscuous mode [1154589.841178] weave: port 2(vethwedu) entered disabled state [1154590.895429] device vxlan-6784 left promiscuous mode [1154590.919255] device vxlan-6784 entered promiscuous mode [1590939.911882] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [1590939.913241] alg: No test for adler32 (adler32-zlib) [1590940.703446] Lustre: Lustre: Build Version: 2.12.58_145_gfcf219d [1590940.768113] LNet: 146625:0:(config.c:1641:lnet_inet_enumerate()) lnet: Ignoring interface dummy0: it's down [1590940.768132] LNet: Using FMR for registration [1590940.776872] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [1590941.962895] Lustre: Mounted ai400-client [1591631.040779] LustreError: 11-0: ai400-OST0007-osc-ffff9dbf39146800: operation ost_read to node 10.0.10.178@o2ib10 failed: rc = -107 [1591631.043559] Lustre: ai400-OST0007-osc-ffff9dbf39146800: Connection to ai400-OST0007 (at 10.0.10.178@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [1591631.055366] LustreError: 167-0: ai400-OST0007-osc-ffff9dbf39146800: This client was evicted by ai400-OST0007; in progress operations using this service will fail. [1591632.354426] Lustre: ai400-OST0003-osc-ffff9dbf39146800: Connection to ai400-OST0003 (at 10.0.10.176@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [1591632.354612] LustreError: 167-0: ai400-OST0003-osc-ffff9dbf39146800: This client was evicted by ai400-OST0003; in progress operations using this service will fail. [1591642.115121] Lustre: ai400-OST0004-osc-ffff9dbf39146800: Connection to ai400-OST0004 (at 10.0.10.177@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [1591642.115311] LustreError: 167-0: ai400-OST0004-osc-ffff9dbf39146800: This client was evicted by ai400-OST0004; in progress operations using this service will fail. [1591651.483084] Lustre: ai400-OST0000-osc-ffff9dbf39146800: Connection to ai400-OST0000 (at 10.0.10.175@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [1591651.483334] LustreError: 167-0: ai400-OST0000-osc-ffff9dbf39146800: This client was evicted by ai400-OST0000; in progress operations using this service will fail. [1591681.894887] NMI watchdog: BUG: soft lockup - CPU#17 stuck for 22s! [ldlm_bl_24:147752] [1591681.896689] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1591681.896729] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1591681.896772] CPU: 17 PID: 147752 Comm: ldlm_bl_24 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1591681.896773] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1591681.896775] task: ffff9da125ac30c0 ti: ffff9dae63ab4000 task.ti: ffff9dae63ab4000 [1591681.896777] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1591681.896785] RSP: 0018:ffff9dae63ab7c70 EFLAGS: 00000246 [1591681.896786] RAX: 0000000000000000 RBX: ffff9da3cc650c80 RCX: 0000000000890000 [1591681.896788] RDX: ffff9dbf3dfdb780 RSI: 0000000000990001 RDI: ffff9daf0aa24058 [1591681.896789] RBP: ffff9dae63ab7c70 R08: ffff9dbf3df5b780 R09: 0000000000000000 [1591681.896790] R10: 0000000000000000 R11: fffff9033c3c8c80 R12: ffff9dae63ab7c38 [1591681.896791] R13: ffff9dba6fe2c0e0 R14: ffff9dba6fff8a28 R15: ffff9dba6fe2c178 [1591681.896793] FS: 0000000000000000(0000) GS:ffff9dbf3df40000(0000) knlGS:0000000000000000 [1591681.896794] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1591681.896795] CR2: 00007f8d12f32000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1591681.896797] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1591681.896798] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1591681.896799] Call Trace: [1591681.896805] [] queued_spin_lock_slowpath+0xb/0xf [1591681.896810] [] _raw_spin_lock+0x20/0x30 [1591681.896838] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1591681.896848] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1591681.896873] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1591681.896887] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1591681.896903] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1591681.896918] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1591681.896934] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1591681.896937] [] ? wake_up_state+0x20/0x20 [1591681.896952] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1591681.896955] [] kthread+0xd1/0xe0 [1591681.896957] [] ? insert_kthread_work+0x40/0x40 [1591681.896961] [] ret_from_fork_nospec_begin+0x7/0x21 [1591681.896962] [] ? insert_kthread_work+0x40/0x40 [1591681.896964] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1591685.899888] NMI watchdog: BUG: soft lockup - CPU#19 stuck for 23s! [ldlm_bl_18:147615] [1591685.901348] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1591685.901387] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1591685.901428] CPU: 19 PID: 147615 Comm: ldlm_bl_18 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1591685.901429] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1591685.901431] task: ffff9daf3d329040 ti: ffff9da740348000 task.ti: ffff9da740348000 [1591685.901433] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1591685.901441] RSP: 0018:ffff9da74034bc70 EFLAGS: 00000246 [1591685.901442] RAX: 0000000000000000 RBX: ffff9daf18fe0f00 RCX: 0000000000990000 [1591685.901443] RDX: ffff9dbf3de1b780 RSI: 0000000000610001 RDI: ffff9daf0aa24058 [1591685.901444] RBP: ffff9da74034bc70 R08: ffff9dbf3dfdb780 R09: 0000000000000000 [1591685.901445] R10: 0000000000000000 R11: fffff9032f36d300 R12: ffff9da74034bc38 [1591685.901446] R13: ffff9dbc62b0a070 R14: ffff9db3cb611688 R15: ffff9dbc62b0a108 [1591685.901447] FS: 0000000000000000(0000) GS:ffff9dbf3dfc0000(0000) knlGS:0000000000000000 [1591685.901449] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1591685.901450] CR2: 0000000001c13080 CR3: 000000153ac10000 CR4: 00000000003607e0 [1591685.901451] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1591685.901452] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1591685.901453] Call Trace: [1591685.901459] [] queued_spin_lock_slowpath+0xb/0xf [1591685.901463] [] _raw_spin_lock+0x20/0x30 [1591685.901492] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1591685.901501] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1591685.901524] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1591685.901538] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1591685.901553] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1591685.901568] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1591685.901582] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1591685.901585] [] ? wake_up_state+0x20/0x20 [1591685.901599] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1591685.901602] [] kthread+0xd1/0xe0 [1591685.901604] [] ? insert_kthread_work+0x40/0x40 [1591685.901606] [] ret_from_fork_nospec_begin+0x7/0x21 [1591685.901608] [] ? insert_kthread_work+0x40/0x40 [1591685.901609] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1591689.797889] NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [ldlm_bl_19:147617] [1591689.799273] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1591689.799310] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1591689.799346] CPU: 2 PID: 147617 Comm: ldlm_bl_19 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1591689.799347] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1591689.799349] task: ffff9daf22fe0000 ti: ffff9daf18e40000 task.ti: ffff9daf18e40000 [1591689.799350] RIP: 0010:[] [] interval_last+0x2f/0x120 [ptlrpc] [1591689.799390] RSP: 0018:ffff9daf18e43c20 EFLAGS: 00000286 [1591689.799391] RAX: 0000000000000001 RBX: ffff9da33a115308 RCX: 0000000000000000 [1591689.799392] RDX: ffff9dbb05196400 RSI: 0000000171a08000 RDI: ffff9dac56dcb900 [1591689.799393] RBP: ffff9daf18e43c28 R08: 0000000000000000 R09: ffff9dbf3df9b780 [1591689.799394] R10: 0000000000000000 R11: fffff9032c4b9c80 R12: ffff9db85d2f2120 [1591689.799395] R13: ffff9db85d2f2108 R14: ffff9da33a115320 R15: 00000000ec160b51 [1591689.799397] FS: 0000000000000000(0000) GS:ffff9daf3e680000(0000) knlGS:0000000000000000 [1591689.799398] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1591689.799399] CR2: 0000000001642038 CR3: 000000153ac10000 CR4: 00000000003607e0 [1591689.799400] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1591689.799401] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1591689.799402] Call Trace: [1591689.799420] [] interval_iterate_reverse+0x7d/0x270 [ptlrpc] [1591689.799433] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1591689.799438] [] ? remove_waiter+0x66/0x126 [1591689.799450] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1591689.799462] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1591689.799489] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1591689.799502] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1591689.799515] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1591689.799529] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1591689.799532] [] ? wake_up_state+0x20/0x20 [1591689.799546] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1591689.799549] [] kthread+0xd1/0xe0 [1591689.799550] [] ? insert_kthread_work+0x40/0x40 [1591689.799554] [] ret_from_fork_nospec_begin+0x7/0x21 [1591689.799556] [] ? insert_kthread_work+0x40/0x40 [1591689.799556] Code: 00 8b 0d ad c6 c3 ff 55 48 89 e5 53 48 89 fb 83 e1 01 75 48 48 85 ff 75 0e 31 c0 90 5b 5d c3 0f 1f 44 00 00 48 89 d3 48 8b 53 08 <48> 85 d2 75 f4 85 c9 48 89 d8 74 e5 f6 05 7a c6 c3 ff 01 74 dc [1591689.818888] NMI watchdog: BUG: soft lockup - CPU#11 stuck for 22s! [ldlm_bl_12:147599] [1591689.820228] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1591689.820255] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1591689.820284] CPU: 11 PID: 147599 Comm: ldlm_bl_12 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1591689.820285] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1591689.820287] task: ffff9daf17ee5140 ti: ffff9da2212a8000 task.ti: ffff9da2212a8000 [1591689.820288] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1591689.820296] RSP: 0018:ffff9da2212abc70 EFLAGS: 00000246 [1591689.820297] RAX: 0000000000000000 RBX: ffff9daf18fe21c0 RCX: 0000000000590000 [1591689.820298] RDX: ffff9dbf3df5b780 RSI: 0000000000890001 RDI: ffff9daf0aa24058 [1591689.820299] RBP: ffff9da2212abc70 R08: ffff9daf3e8db780 R09: 0000000000000000 [1591689.820300] R10: 0000000000000000 R11: fffff902d678e580 R12: ffff9da2212abc38 [1591689.820301] R13: ffff9db8812ed680 R14: ffff9dbbc1e34fc8 R15: ffff9db8812ed718 [1591689.820302] FS: 0000000000000000(0000) GS:ffff9daf3e8c0000(0000) knlGS:0000000000000000 [1591689.820303] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1591689.820304] CR2: 00007fc899adebe0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1591689.820305] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1591689.820305] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1591689.820306] Call Trace: [1591689.820310] [] queued_spin_lock_slowpath+0xb/0xf [1591689.820315] [] _raw_spin_lock+0x20/0x30 [1591689.820341] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1591689.820351] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1591689.820374] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1591689.820387] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1591689.820401] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1591689.820414] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1591689.820428] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1591689.820430] [] ? wake_up_state+0x20/0x20 [1591689.820444] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1591689.820445] [] kthread+0xd1/0xe0 [1591689.820447] [] ? insert_kthread_work+0x40/0x40 [1591689.820450] [] ret_from_fork_nospec_begin+0x7/0x21 [1591689.820452] [] ? insert_kthread_work+0x40/0x40 [1591689.820452] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1591689.882889] NMI watchdog: BUG: soft lockup - CPU#12 stuck for 22s! [ldlm_bl_01:146711] [1591689.884314] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1591689.884353] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1591689.884393] CPU: 12 PID: 146711 Comm: ldlm_bl_01 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1591689.884394] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1591689.884396] task: ffff9db43a7e1040 ti: ffff9daed9efc000 task.ti: ffff9daed9efc000 [1591689.884397] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1591689.884405] RSP: 0018:ffff9daed9effc70 EFLAGS: 00000246 [1591689.884406] RAX: 0000000000000000 RBX: ffff9da92734ea80 RCX: 0000000000610000 [1591689.884407] RDX: ffff9dbf3df1b780 RSI: 0000000000810001 RDI: ffff9daf0aa24058 [1591689.884408] RBP: ffff9daed9effc70 R08: ffff9dbf3de1b780 R09: 0000000000000000 [1591689.884409] R10: 0000000000000000 R11: fffff90327f58780 R12: ffff9daed9effc38 [1591689.884410] R13: ffff9da603386c20 R14: ffff9da83b38af38 R15: ffff9da603386cb8 [1591689.884412] FS: 0000000000000000(0000) GS:ffff9dbf3de00000(0000) knlGS:0000000000000000 [1591689.884413] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1591689.884414] CR2: 00007ff98e693003 CR3: 000000153ac10000 CR4: 00000000003607e0 [1591689.884415] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1591689.884416] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1591689.884417] Call Trace: [1591689.884423] [] queued_spin_lock_slowpath+0xb/0xf [1591689.884427] [] _raw_spin_lock+0x20/0x30 [1591689.884458] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1591689.884468] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1591689.884490] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1591689.884505] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1591689.884520] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1591689.884534] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1591689.884549] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1591689.884552] [] ? wake_up_state+0x20/0x20 [1591689.884566] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1591689.884568] [] kthread+0xd1/0xe0 [1591689.884570] [] ? insert_kthread_work+0x40/0x40 [1591689.884573] [] ret_from_fork_nospec_begin+0x7/0x21 [1591689.884575] [] ? insert_kthread_work+0x40/0x40 [1591689.884576] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1591689.892888] NMI watchdog: BUG: soft lockup - CPU#16 stuck for 22s! [ldlm_bl_02:146712] [1591689.894521] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1591689.894552] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1591689.894583] CPU: 16 PID: 146712 Comm: ldlm_bl_02 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1591689.894585] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1591689.894586] task: ffff9db43a7e0000 ti: ffff9da9ab244000 task.ti: ffff9da9ab244000 [1591689.894588] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x156/0x200 [1591689.894593] RSP: 0018:ffff9da9ab247c70 EFLAGS: 00000202 [1591689.894594] RAX: 0000000000000001 RBX: ffff9da92734c000 RCX: 0000000000810000 [1591689.894596] RDX: 0000000000990001 RSI: 0000000000590001 RDI: ffff9daf0aa24058 [1591689.894597] RBP: ffff9da9ab247c70 R08: ffff9dbf3df1b780 R09: ffff9dbf3e0db780 [1591689.894598] R10: 0000000000000000 R11: fffff90324595b00 R12: ffff9da9ab247c38 [1591689.894599] R13: ffff9db83ea815a0 R14: ffff9db9597bd838 R15: ffff9db83ea81638 [1591689.894601] FS: 0000000000000000(0000) GS:ffff9dbf3df00000(0000) knlGS:0000000000000000 [1591689.894602] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1591689.894603] CR2: 00007f657a5d29e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1591689.894605] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1591689.894606] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1591689.894607] Call Trace: [1591689.894610] [] queued_spin_lock_slowpath+0xb/0xf [1591689.894614] [] _raw_spin_lock+0x20/0x30 [1591689.894634] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1591689.894642] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1591689.894660] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1591689.894674] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1591689.894689] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1591689.894704] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1591689.894720] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1591689.894723] [] ? wake_up_state+0x20/0x20 [1591689.894738] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1591689.894740] [] kthread+0xd1/0xe0 [1591689.894742] [] ? insert_kthread_work+0x40/0x40 [1591689.894745] [] ret_from_fork_nospec_begin+0x7/0x21 [1591689.894747] [] ? insert_kthread_work+0x40/0x40 [1591689.894748] Code: 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 85 c0 74 21 83 f8 03 75 10 eb 1a 66 2e 0f 1f 84 00 00 00 00 00 85 c0 74 0c f3 90 <8b> 17 0f b7 c2 83 f8 03 75 f0 be 01 00 00 00 eb 15 66 0f 1f 84 [1591693.909908] NMI watchdog: BUG: soft lockup - CPU#23 stuck for 22s! [ldlm_bl_21:147626] [1591693.911295] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1591693.911333] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1591693.911375] CPU: 23 PID: 147626 Comm: ldlm_bl_21 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1591693.911377] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1591693.911378] task: ffff9daf3d37a080 ti: ffff9da918c78000 task.ti: ffff9da918c78000 [1591693.911380] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1591693.911388] RSP: 0018:ffff9da918c7bc70 EFLAGS: 00000246 [1591693.911390] RAX: 0000000000000000 RBX: ffff9da92734cf00 RCX: 0000000000b90000 [1591693.911391] RDX: ffff9daf3e8db780 RSI: 0000000000910001 RDI: ffff9daf0aa24058 [1591693.911392] RBP: ffff9da918c7bc70 R08: ffff9dbf3e0db780 R09: 0000000000000000 [1591693.911393] R10: 0000000000000000 R11: fffff9033c609c80 R12: ffff9da918c7bc38 [1591693.911394] R13: ffff9dbd45e54bb0 R14: ffff9dbf38564f38 R15: ffff9dbd45e54c48 [1591693.911395] FS: 0000000000000000(0000) GS:ffff9dbf3e0c0000(0000) knlGS:0000000000000000 [1591693.911396] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1591693.911398] CR2: 00007fdbf7fff9e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1591693.911399] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1591693.911400] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1591693.911401] Call Trace: [1591693.911406] [] queued_spin_lock_slowpath+0xb/0xf [1591693.911411] [] _raw_spin_lock+0x20/0x30 [1591693.911441] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1591693.911451] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1591693.911475] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1591693.911489] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1591693.911504] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1591693.911519] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1591693.911533] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1591693.911537] [] ? wake_up_state+0x20/0x20 [1591693.911551] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1591693.911553] [] kthread+0xd1/0xe0 [1591693.911555] [] ? insert_kthread_work+0x40/0x40 [1591693.911558] [] ret_from_fork_nospec_begin+0x7/0x21 [1591693.911559] [] ? insert_kthread_work+0x40/0x40 [1591693.911560] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1591697.897972] NMI watchdog: BUG: soft lockup - CPU#18 stuck for 23s! [ldlm_bl_06:147592] [1591697.899628] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1591697.899667] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1591697.899709] CPU: 18 PID: 147592 Comm: ldlm_bl_06 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1591697.899711] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1591697.899713] task: ffff9daf17f3a080 ti: ffff9da93cbd0000 task.ti: ffff9da93cbd0000 [1591697.899714] RIP: 0010:[] [] interval_last+0xf/0x120 [ptlrpc] [1591697.899749] RSP: 0018:ffff9da93cbd3c28 EFLAGS: 00000282 [1591697.899751] RAX: 0000000000000001 RBX: ffff9da9937260a0 RCX: 00000000130e0588 [1591697.899752] RDX: 00000000fe675000 RSI: 00000001719d5000 RDI: ffff9da33f2eff80 [1591697.899753] RBP: ffff9da93cbd3c28 R08: 0000000000000000 R09: ffff9dbf3e0db780 [1591697.899754] R10: 0000000000000000 R11: fffff902ea6fe000 R12: ffff9dacdee3abf0 [1591697.899755] R13: ffff9dacdee3abd8 R14: ffff9da9937260b8 R15: 0000000006784954 [1591697.899757] FS: 0000000000000000(0000) GS:ffff9dbf3df80000(0000) knlGS:0000000000000000 [1591697.899758] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1591697.899759] CR2: 00007f8d12f32000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1591697.899761] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1591697.899762] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1591697.899763] Call Trace: [1591697.899782] [] interval_iterate_reverse+0x7d/0x270 [ptlrpc] [1591697.899797] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1591697.899801] [] ? remove_waiter+0x66/0x126 [1591697.899811] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1591697.899825] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1591697.899848] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1591697.899862] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1591697.899878] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1591697.899893] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1591697.899897] [] ? wake_up_state+0x20/0x20 [1591697.899912] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1591697.899915] [] kthread+0xd1/0xe0 [1591697.899917] [] ? insert_kthread_work+0x40/0x40 [1591697.899921] [] ret_from_fork_nospec_begin+0x7/0x21 [1591697.899923] [] ? insert_kthread_work+0x40/0x40 [1591697.899924] Code: 00 00 00 e8 34 cf c1 ff 5b 41 5c 5d c3 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 8b 0d ad c6 c3 ff 55 48 89 e5 <53> 48 89 fb 83 e1 01 75 48 48 85 ff 75 0e 31 c0 90 5b 5d c3 0f [1591701.900336] Lustre: ai400-OST0007-osc-ffff9dbf39146800: Connection restored to 10.0.10.178@o2ib10 (at 10.0.10.178@o2ib10) [1591702.473841] Lustre: ai400-OST0004-osc-ffff9dbf39146800: Connection restored to 10.0.10.177@o2ib10 (at 10.0.10.177@o2ib10) [1592469.811985] NMI watchdog: BUG: soft lockup - CPU#5 stuck for 23s! [ldlm_bl_18:147615] [1592469.813607] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592469.813648] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592469.813689] CPU: 5 PID: 147615 Comm: ldlm_bl_18 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592469.813691] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592469.813693] task: ffff9daf3d329040 ti: ffff9da740348000 task.ti: ffff9da740348000 [1592469.813695] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1592469.813705] RSP: 0018:ffff9da74034bc70 EFLAGS: 00000246 [1592469.813706] RAX: 0000000000000000 RBX: ffff9da199516e40 RCX: 0000000000290000 [1592469.813707] RDX: ffff9dbf3e05b780 RSI: 0000000000a90001 RDI: ffff9da7713d6058 [1592469.813708] RBP: ffff9da74034bc70 R08: ffff9daf3e75b780 R09: 0000000000000000 [1592469.813709] R10: 0000000000000000 R11: fffff902e60bb100 R12: ffff9da74034bc38 [1592469.813711] R13: ffff9da8b1f18ad0 R14: ffff9dac0d5b2ea8 R15: ffff9da8b1f18b68 [1592469.813712] FS: 0000000000000000(0000) GS:ffff9daf3e740000(0000) knlGS:0000000000000000 [1592469.813713] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592469.813715] CR2: 0000000001015000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592469.813716] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592469.813718] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592469.813719] Call Trace: [1592469.813726] [] queued_spin_lock_slowpath+0xb/0xf [1592469.813732] [] _raw_spin_lock+0x20/0x30 [1592469.813771] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592469.813783] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592469.813811] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592469.813825] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592469.813842] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592469.813858] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592469.813874] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592469.813877] [] ? wake_up_state+0x20/0x20 [1592469.813893] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592469.813896] [] kthread+0xd1/0xe0 [1592469.813898] [] ? insert_kthread_work+0x40/0x40 [1592469.813901] [] ret_from_fork_nospec_begin+0x7/0x21 [1592469.813903] [] ? insert_kthread_work+0x40/0x40 [1592469.813904] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1592469.916985] NMI watchdog: BUG: soft lockup - CPU#23 stuck for 23s! [ldlm_bl_09:147595] [1592469.918617] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592469.918658] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592469.918701] CPU: 23 PID: 147595 Comm: ldlm_bl_09 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592469.918702] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592469.918704] task: ffff9daf17f3e180 ti: ffff9da94f774000 task.ti: ffff9da94f774000 [1592469.918706] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1592469.918714] RSP: 0018:ffff9da94f777c70 EFLAGS: 00000246 [1592469.918715] RAX: 0000000000000000 RBX: ffff9daa5b2c6940 RCX: 0000000000b90000 [1592469.918717] RDX: ffff9daf3e61b780 RSI: 0000000000010001 RDI: ffff9da7713d6058 [1592469.918718] RBP: ffff9da94f777c70 R08: ffff9dbf3e0db780 R09: 0000000000000000 [1592469.918719] R10: 0000000000000000 R11: fffff902ead8a300 R12: ffff9da94f777c38 [1592469.918720] R13: ffff9db775236150 R14: ffff9db4107fa908 R15: ffff9db7752361e8 [1592469.918722] FS: 0000000000000000(0000) GS:ffff9dbf3e0c0000(0000) knlGS:0000000000000000 [1592469.918723] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592469.918724] CR2: 0000555650e4c082 CR3: 0000001ff9610000 CR4: 00000000003607e0 [1592469.918726] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592469.918727] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592469.918728] Call Trace: [1592469.918734] [] queued_spin_lock_slowpath+0xb/0xf [1592469.918739] [] _raw_spin_lock+0x20/0x30 [1592469.918771] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592469.918782] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592469.918809] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592469.918824] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592469.918840] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592469.918855] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592469.918871] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592469.918875] [] ? wake_up_state+0x20/0x20 [1592469.918890] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592469.918894] [] kthread+0xd1/0xe0 [1592469.918897] [] ? finish_task_switch+0x57/0x1c0 [1592469.918899] [] ? insert_kthread_work+0x40/0x40 [1592469.918902] [] ret_from_fork_nospec_begin+0x7/0x21 [1592469.918904] [] ? insert_kthread_work+0x40/0x40 [1592469.918905] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1592473.900012] NMI watchdog: BUG: soft lockup - CPU#16 stuck for 22s! [ldlm_bl_14:147606] [1592473.901351] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592473.901390] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592473.901430] CPU: 16 PID: 147606 Comm: ldlm_bl_14 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592473.901432] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592473.901434] task: ffff9d9f36a41040 ti: ffff9da1fa408000 task.ti: ffff9da1fa408000 [1592473.901435] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x120/0x200 [1592473.901444] RSP: 0018:ffff9da1fa40bc70 EFLAGS: 00000246 [1592473.901445] RAX: 0000000000000000 RBX: ffff9da1995175c0 RCX: 0000000000810000 [1592473.901446] RDX: ffff9daf3e85b780 RSI: 0000000000490001 RDI: ffff9da7713d6058 [1592473.901447] RBP: ffff9da1fa40bc70 R08: ffff9dbf3df1b780 R09: 0000000000000000 [1592473.901448] R10: 0000000000000000 R11: fffff902e9a0fc80 R12: ffff9da1fa40bc38 [1592473.901449] R13: ffff9db9f1b50000 R14: ffff9dbc62b9d178 R15: ffff9db9f1b50098 [1592473.901450] FS: 0000000000000000(0000) GS:ffff9dbf3df00000(0000) knlGS:0000000000000000 [1592473.901452] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592473.901453] CR2: 00007f8d12f32000 CR3: 0000001fd9a26000 CR4: 00000000003607e0 [1592473.901454] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592473.901455] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592473.901456] Call Trace: [1592473.901462] [] queued_spin_lock_slowpath+0xb/0xf [1592473.901466] [] _raw_spin_lock+0x20/0x30 [1592473.901497] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592473.901508] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592473.901532] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592473.901546] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592473.901562] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592473.901578] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592473.901594] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592473.901597] [] ? wake_up_state+0x20/0x20 [1592473.901613] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592473.901615] [] kthread+0xd1/0xe0 [1592473.901617] [] ? insert_kthread_work+0x40/0x40 [1592473.901621] [] ret_from_fork_nospec_begin+0x7/0x21 [1592473.901622] [] ? insert_kthread_work+0x40/0x40 [1592473.901623] Code: c1 e8 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 90 41 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 [1592473.905011] NMI watchdog: BUG: soft lockup - CPU#18 stuck for 22s! [ldlm_bl_08:147594] [1592473.906230] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592473.906257] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592473.906283] CPU: 18 PID: 147594 Comm: ldlm_bl_08 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592473.906284] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592473.906286] task: ffff9daf17f3c100 ti: ffff9daa70b7c000 task.ti: ffff9daa70b7c000 [1592473.906287] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592473.906292] RSP: 0018:ffff9daa70b7fc70 EFLAGS: 00000246 [1592473.906293] RAX: 0000000000000000 RBX: ffff9daf36ba4140 RCX: 0000000000910000 [1592473.906294] RDX: ffff9dbf3e05b780 RSI: 0000000000a90001 RDI: ffff9da7713d6058 [1592473.906295] RBP: ffff9daa70b7fc70 R08: ffff9dbf3df9b780 R09: 0000000000000000 [1592473.906296] R10: 0000000000000000 R11: fffff902f79bf880 R12: ffff9daa70b7fc38 [1592473.906297] R13: ffff9db91a33c0e0 R14: ffff9db8d7783058 R15: ffff9db91a33c178 [1592473.906299] FS: 0000000000000000(0000) GS:ffff9dbf3df80000(0000) knlGS:0000000000000000 [1592473.906300] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592473.906301] CR2: 0000555650b9b0c0 CR3: 0000001901220000 CR4: 00000000003607e0 [1592473.906302] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592473.906303] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592473.906304] Call Trace: [1592473.906307] [] queued_spin_lock_slowpath+0xb/0xf [1592473.906310] [] _raw_spin_lock+0x20/0x30 [1592473.906327] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592473.906335] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592473.906349] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592473.906363] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592473.906378] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592473.906393] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592473.906408] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592473.906411] [] ? wake_up_state+0x20/0x20 [1592473.906426] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592473.906428] [] kthread+0xd1/0xe0 [1592473.906430] [] ? insert_kthread_work+0x40/0x40 [1592473.906433] [] ret_from_fork_nospec_begin+0x7/0x21 [1592473.906435] [] ? insert_kthread_work+0x40/0x40 [1592473.906436] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592473.910013] NMI watchdog: BUG: soft lockup - CPU#20 stuck for 22s! [ldlm_bl_01:146711] [1592473.911490] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592473.911518] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592473.911545] CPU: 20 PID: 146711 Comm: ldlm_bl_01 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592473.911546] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592473.911548] task: ffff9db43a7e1040 ti: ffff9daed9efc000 task.ti: ffff9daed9efc000 [1592473.911550] RIP: 0010:[] [] interval_last+0x2f/0x120 [ptlrpc] [1592473.911571] RSP: 0018:ffff9daed9effc20 EFLAGS: 00000282 [1592473.911572] RAX: 0000000000000001 RBX: ffff9db9fe33cae0 RCX: 0000000000000000 [1592473.911573] RDX: 0000000000000000 RSI: 0000000171f0d000 RDI: ffff9da727944f80 [1592473.911575] RBP: ffff9daed9effc28 R08: 0000000000000000 R09: ffff9daf3e7db780 [1592473.911576] R10: 0000000000000000 R11: fffff902fc676980 R12: ffff9db9f1b536c0 [1592473.911577] R13: ffff9db9f1b536a8 R14: ffff9db9fe33caf8 R15: 000000004f8e9643 [1592473.911578] FS: 0000000000000000(0000) GS:ffff9dbf3e000000(0000) knlGS:0000000000000000 [1592473.911580] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592473.911581] CR2: 00007f8d12f32000 CR3: 0000001f30b18000 CR4: 00000000003607e0 [1592473.911582] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592473.911583] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592473.911584] Call Trace: [1592473.911602] [] interval_iterate_reverse+0x7d/0x270 [ptlrpc] [1592473.911617] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1592473.911620] [] ? remove_waiter+0x66/0x126 [1592473.911627] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1592473.911640] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592473.911655] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592473.911670] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592473.911685] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592473.911700] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592473.911703] [] ? wake_up_state+0x20/0x20 [1592473.911718] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592473.911720] [] kthread+0xd1/0xe0 [1592473.911722] [] ? insert_kthread_work+0x40/0x40 [1592473.911725] [] ret_from_fork_nospec_begin+0x7/0x21 [1592473.911727] [] ? insert_kthread_work+0x40/0x40 [1592473.911728] Code: 00 8b 0d ad c6 c3 ff 55 48 89 e5 53 48 89 fb 83 e1 01 75 48 48 85 ff 75 0e 31 c0 90 5b 5d c3 0f 1f 44 00 00 48 89 d3 48 8b 53 08 <48> 85 d2 75 f4 85 c9 48 89 d8 74 e5 f6 05 7a c6 c3 ff 01 74 dc [1592497.812173] NMI watchdog: BUG: soft lockup - CPU#5 stuck for 23s! [ldlm_bl_18:147615] [1592497.813434] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592497.813472] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592497.813511] CPU: 5 PID: 147615 Comm: ldlm_bl_18 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592497.813513] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592497.813515] task: ffff9daf3d329040 ti: ffff9da740348000 task.ti: ffff9da740348000 [1592497.813516] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592497.813525] RSP: 0018:ffff9da74034bc70 EFLAGS: 00000246 [1592497.813526] RAX: 0000000000000000 RBX: ffff9da892d01900 RCX: 0000000000290000 [1592497.813528] RDX: ffff9dbf3df5b780 RSI: 0000000000890001 RDI: ffff9da7713d6058 [1592497.813529] RBP: ffff9da74034bc70 R08: ffff9daf3e75b780 R09: 0000000000000000 [1592497.813530] R10: 0000000000000000 R11: fffff902d41e3480 R12: ffff9da74034bc38 [1592497.813531] R13: ffff9da8b1f18ad0 R14: ffff9dac0d5b2ea8 R15: ffff9da8b1f18b68 [1592497.813532] FS: 0000000000000000(0000) GS:ffff9daf3e740000(0000) knlGS:0000000000000000 [1592497.813533] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592497.813534] CR2: 0000000001015000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592497.813535] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592497.813536] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592497.813537] Call Trace: [1592497.813544] [] queued_spin_lock_slowpath+0xb/0xf [1592497.813550] [] _raw_spin_lock+0x20/0x30 [1592497.813586] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592497.813597] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592497.813624] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592497.813627] [] ? native_queued_spin_lock_slowpath+0x1d0/0x200 [1592497.813642] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592497.813657] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592497.813672] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592497.813675] [] ? wake_up_state+0x20/0x20 [1592497.813690] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592497.813692] [] kthread+0xd1/0xe0 [1592497.813694] [] ? insert_kthread_work+0x40/0x40 [1592497.813697] [] ret_from_fork_nospec_begin+0x7/0x21 [1592497.813699] [] ? insert_kthread_work+0x40/0x40 [1592497.813700] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592497.917173] NMI watchdog: BUG: soft lockup - CPU#23 stuck for 23s! [ldlm_bl_09:147595] [1592497.918443] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592497.918481] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592497.918522] CPU: 23 PID: 147595 Comm: ldlm_bl_09 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592497.918523] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592497.918525] task: ffff9daf17f3e180 ti: ffff9da94f774000 task.ti: ffff9da94f774000 [1592497.918526] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592497.918534] RSP: 0018:ffff9da94f777c70 EFLAGS: 00000246 [1592497.918535] RAX: 0000000000000000 RBX: ffff9da199515b80 RCX: 0000000000b90000 [1592497.918536] RDX: ffff9dbf3df1b780 RSI: 0000000000810001 RDI: ffff9da7713d6058 [1592497.918537] RBP: ffff9da94f777c70 R08: ffff9dbf3e0db780 R09: 0000000000000000 [1592497.918538] R10: 0000000000000000 R11: fffff902f20cf800 R12: ffff9da94f777c38 [1592497.918539] R13: ffff9db775236150 R14: ffff9db4107fa908 R15: ffff9db7752361e8 [1592497.918541] FS: 0000000000000000(0000) GS:ffff9dbf3e0c0000(0000) knlGS:0000000000000000 [1592497.918542] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592497.918543] CR2: 0000555650e4c082 CR3: 0000001ff9610000 CR4: 00000000003607e0 [1592497.918544] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592497.918545] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592497.918546] Call Trace: [1592497.918552] [] queued_spin_lock_slowpath+0xb/0xf [1592497.918556] [] _raw_spin_lock+0x20/0x30 [1592497.918588] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592497.918599] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592497.918623] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592497.918636] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592497.918652] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592497.918666] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592497.918681] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592497.918684] [] ? wake_up_state+0x20/0x20 [1592497.918698] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592497.918701] [] kthread+0xd1/0xe0 [1592497.918704] [] ? finish_task_switch+0x57/0x1c0 [1592497.918705] [] ? insert_kthread_work+0x40/0x40 [1592497.918708] [] ret_from_fork_nospec_begin+0x7/0x21 [1592497.918710] [] ? insert_kthread_work+0x40/0x40 [1592497.918711] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592501.895201] NMI watchdog: BUG: soft lockup - CPU#14 stuck for 22s! [ldlm_bl_24:147752] [1592501.896733] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592501.896775] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592501.896817] CPU: 14 PID: 147752 Comm: ldlm_bl_24 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592501.896819] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592501.896821] task: ffff9da125ac30c0 ti: ffff9dae63ab4000 task.ti: ffff9dae63ab4000 [1592501.896822] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592501.896831] RSP: 0018:ffff9dae63ab7c70 EFLAGS: 00000246 [1592501.896832] RAX: 0000000000000000 RBX: ffff9daf36ba48c0 RCX: 0000000000710000 [1592501.896833] RDX: ffff9daf3e81b780 RSI: 0000000000410001 RDI: ffff9da7713d6058 [1592501.896835] RBP: ffff9dae63ab7c70 R08: ffff9dbf3de9b780 R09: 0000000000000000 [1592501.896836] R10: 0000000000000000 R11: fffff902d371d200 R12: ffff9dae63ab7c38 [1592501.896837] R13: ffff9dbc62a02b40 R14: ffff9dbf3f3a7568 R15: ffff9dbc62a02bd8 [1592501.896838] FS: 0000000000000000(0000) GS:ffff9dbf3de80000(0000) knlGS:0000000000000000 [1592501.896840] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592501.896841] CR2: 00007fadefc7ffb8 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592501.896842] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592501.896843] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592501.896844] Call Trace: [1592501.896850] [] queued_spin_lock_slowpath+0xb/0xf [1592501.896855] [] _raw_spin_lock+0x20/0x30 [1592501.896886] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592501.896897] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592501.896920] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592501.896935] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592501.896950] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592501.896966] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592501.896982] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592501.896985] [] ? wake_up_state+0x20/0x20 [1592501.897000] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592501.897003] [] kthread+0xd1/0xe0 [1592501.897005] [] ? insert_kthread_work+0x40/0x40 [1592501.897008] [] ret_from_fork_nospec_begin+0x7/0x21 [1592501.897010] [] ? insert_kthread_work+0x40/0x40 [1592501.897011] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592501.900199] NMI watchdog: BUG: soft lockup - CPU#16 stuck for 22s! [ldlm_bl_14:147606] [1592501.901589] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592501.901616] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592501.901643] CPU: 16 PID: 147606 Comm: ldlm_bl_14 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592501.901645] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592501.901646] task: ffff9d9f36a41040 ti: ffff9da1fa408000 task.ti: ffff9da1fa408000 [1592501.901648] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592501.901652] RSP: 0018:ffff9da1fa40bc70 EFLAGS: 00000246 [1592501.901653] RAX: 0000000000000000 RBX: ffff9daa5b2c6f80 RCX: 0000000000810000 [1592501.901654] RDX: ffff9daf3e71b780 RSI: 0000000000210001 RDI: ffff9da7713d6058 [1592501.901656] RBP: ffff9da1fa40bc70 R08: ffff9dbf3df1b780 R09: 0000000000000000 [1592501.901657] R10: 0000000000000000 R11: fffff902f77b8480 R12: ffff9da1fa40bc38 [1592501.901658] R13: ffff9db9f1b50000 R14: ffff9dbc62b9d178 R15: ffff9db9f1b50098 [1592501.901659] FS: 0000000000000000(0000) GS:ffff9dbf3df00000(0000) knlGS:0000000000000000 [1592501.901661] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592501.901662] CR2: 00007f8d12f32000 CR3: 0000001fd9a26000 CR4: 00000000003607e0 [1592501.901663] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592501.901664] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592501.901665] Call Trace: [1592501.901668] [] queued_spin_lock_slowpath+0xb/0xf [1592501.901671] [] _raw_spin_lock+0x20/0x30 [1592501.901688] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592501.901696] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592501.901711] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592501.901725] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592501.901740] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592501.901755] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592501.901771] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592501.901773] [] ? wake_up_state+0x20/0x20 [1592501.901788] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592501.901790] [] kthread+0xd1/0xe0 [1592501.901792] [] ? insert_kthread_work+0x40/0x40 [1592501.901795] [] ret_from_fork_nospec_begin+0x7/0x21 [1592501.901797] [] ? insert_kthread_work+0x40/0x40 [1592501.901798] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592501.905199] NMI watchdog: BUG: soft lockup - CPU#18 stuck for 22s! [ldlm_bl_08:147594] [1592501.906548] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592501.906576] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592501.906602] CPU: 18 PID: 147594 Comm: ldlm_bl_08 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592501.906604] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592501.906606] task: ffff9daf17f3c100 ti: ffff9daa70b7c000 task.ti: ffff9daa70b7c000 [1592501.906607] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1592501.906611] RSP: 0018:ffff9daa70b7fc70 EFLAGS: 00000246 [1592501.906612] RAX: 0000000000000000 RBX: ffff9da1995152c0 RCX: 0000000000910000 [1592501.906614] RDX: ffff9dbf3de5b780 RSI: 0000000000690001 RDI: ffff9da7713d6058 [1592501.906615] RBP: ffff9daa70b7fc70 R08: ffff9dbf3df9b780 R09: 0000000000000000 [1592501.906616] R10: 0000000000000000 R11: fffff90330607e80 R12: ffff9daa70b7fc38 [1592501.906617] R13: ffff9db91a33c0e0 R14: ffff9db8d7783058 R15: ffff9db91a33c178 [1592501.906619] FS: 0000000000000000(0000) GS:ffff9dbf3df80000(0000) knlGS:0000000000000000 [1592501.906620] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592501.906621] CR2: 0000555650b9b0c0 CR3: 0000001901220000 CR4: 00000000003607e0 [1592501.906622] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592501.906624] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592501.906624] Call Trace: [1592501.906627] [] queued_spin_lock_slowpath+0xb/0xf [1592501.906630] [] _raw_spin_lock+0x20/0x30 [1592501.906647] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592501.906654] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592501.906669] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592501.906683] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592501.906698] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592501.906713] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592501.906728] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592501.906731] [] ? wake_up_state+0x20/0x20 [1592501.906746] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592501.906748] [] kthread+0xd1/0xe0 [1592501.906750] [] ? insert_kthread_work+0x40/0x40 [1592501.906753] [] ret_from_fork_nospec_begin+0x7/0x21 [1592501.906755] [] ? insert_kthread_work+0x40/0x40 [1592501.906756] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1592501.910199] NMI watchdog: BUG: soft lockup - CPU#20 stuck for 22s! [ldlm_bl_01:146711] [1592501.911530] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592501.911558] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592501.911585] CPU: 20 PID: 146711 Comm: ldlm_bl_01 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592501.911586] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592501.911588] task: ffff9db43a7e1040 ti: ffff9daed9efc000 task.ti: ffff9daed9efc000 [1592501.911589] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592501.911593] RSP: 0018:ffff9daed9effc70 EFLAGS: 00000246 [1592501.911594] RAX: 0000000000000000 RBX: ffff9da199517e80 RCX: 0000000000a10000 [1592501.911596] RDX: ffff9daf3e75b780 RSI: 0000000000290001 RDI: ffff9da7713d6058 [1592501.911597] RBP: ffff9daed9effc70 R08: ffff9dbf3e01b780 R09: 0000000000000000 [1592501.911598] R10: 0000000000000000 R11: fffff90325f8d800 R12: ffff9daed9effc38 [1592501.911599] R13: ffff9db9f1b53610 R14: ffff9dbc62b9d3b8 R15: ffff9db9f1b536a8 [1592501.911601] FS: 0000000000000000(0000) GS:ffff9dbf3e000000(0000) knlGS:0000000000000000 [1592501.911602] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592501.911603] CR2: 00007f8d12f32000 CR3: 0000001f30b18000 CR4: 00000000003607e0 [1592501.911604] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592501.911606] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592501.911607] Call Trace: [1592501.911609] [] queued_spin_lock_slowpath+0xb/0xf [1592501.911612] [] _raw_spin_lock+0x20/0x30 [1592501.911629] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592501.911636] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592501.911651] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592501.911665] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592501.911680] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592501.911695] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592501.911710] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592501.911713] [] ? wake_up_state+0x20/0x20 [1592501.911728] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592501.911730] [] kthread+0xd1/0xe0 [1592501.911732] [] ? insert_kthread_work+0x40/0x40 [1592501.911735] [] ret_from_fork_nospec_begin+0x7/0x21 [1592501.911737] [] ? insert_kthread_work+0x40/0x40 [1592501.911738] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592508.619246] INFO: rcu_sched self-detected stall on CPU [1592508.619248] INFO: rcu_sched self-detected stall on CPU [1592508.619249] INFO: rcu_sched self-detected stall on CPU [1592508.619250] { [1592508.619254] { [1592508.619255] 5 [1592508.619256] 20 [1592508.619257] } [1592508.619264] (t=60000 jiffies g=20931404 c=20931403 q=133339) [1592508.619265] } (t=60000 jiffies g=20931404 c=20931403 q=133339) [1592508.619266] Task dump for CPU 5: [1592508.619269] ldlm_bl_18 R running task 0 147615 2 0x00000088 [1592508.619270] Call Trace: [1592508.619279] [] sched_show_task+0xa8/0x110 [1592508.619281] [] dump_cpu_task+0x39/0x70 [1592508.619285] [] rcu_dump_cpu_stacks+0x90/0xd0 [1592508.619287] [] rcu_check_callbacks+0x442/0x730 [1592508.619292] [] ? tick_sched_do_timer+0x50/0x50 [1592508.619296] [] update_process_times+0x46/0x80 [1592508.619298] [] tick_sched_handle+0x30/0x70 [1592508.619299] [] tick_sched_timer+0x39/0x80 [1592508.619304] [] __hrtimer_run_queues+0xf3/0x270 [1592508.619306] [] hrtimer_interrupt+0xaf/0x1d0 [1592508.619312] [] local_apic_timer_interrupt+0x3b/0x60 [1592508.619316] [] smp_apic_timer_interrupt+0x43/0x60 [1592508.619320] [] apic_timer_interrupt+0x162/0x170 [1592508.619324] [] ? native_queued_spin_lock_slowpath+0x126/0x200 [1592508.619328] [] queued_spin_lock_slowpath+0xb/0xf [1592508.619331] [] _raw_spin_lock+0x20/0x30 [1592508.619368] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592508.619380] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592508.619407] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592508.619421] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592508.619437] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.619453] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.619468] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.619470] [] ? wake_up_state+0x20/0x20 [1592508.619485] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.619487] [] kthread+0xd1/0xe0 [1592508.619488] [] ? insert_kthread_work+0x40/0x40 [1592508.619490] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.619492] [] ? insert_kthread_work+0x40/0x40 [1592508.619493] Task dump for CPU 16: [1592508.619493] Task dump for CPU 5: [1592508.619496] ldlm_bl_14 R [1592508.619496] ldlm_bl_18 R [1592508.619497] running task [1592508.619498] 0 147606 2 0x00000088 [1592508.619500] running task 0 147615 2 0x00000088 [1592508.619501] Call Trace: [1592508.619501] Call Trace: [1592508.619516] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.619549] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.619564] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.619579] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.619594] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.619610] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.619612] [] ? wake_up_state+0x20/0x20 [1592508.619616] [] ? wake_up_state+0x20/0x20 [1592508.619631] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.619646] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.619648] [] kthread+0xd1/0xe0 [1592508.619652] [] kthread+0xd1/0xe0 [1592508.619653] [] ? insert_kthread_work+0x40/0x40 [1592508.619655] [] ? insert_kthread_work+0x40/0x40 [1592508.619658] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.619663] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.619665] [] ? insert_kthread_work+0x40/0x40 [1592508.619666] [] ? insert_kthread_work+0x40/0x40 [1592508.619667] Task dump for CPU 18: [1592508.619669] ldlm_bl_08 R running task 0 147594 2 0x00000088 [1592508.619670] Call Trace: [1592508.619684] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.619698] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.619713] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.619715] [] ? wake_up_state+0x20/0x20 [1592508.619729] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.619731] [] kthread+0xd1/0xe0 [1592508.619732] [] ? insert_kthread_work+0x40/0x40 [1592508.619734] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.619736] [] ? insert_kthread_work+0x40/0x40 [1592508.619736] Task dump for CPU 20: [1592508.619738] ldlm_bl_01 R running task 0 146711 2 0x00000088 [1592508.619738] Call Trace: [1592508.619752] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.619767] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.619781] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.619783] [] ? wake_up_state+0x20/0x20 [1592508.619798] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.619799] [] kthread+0xd1/0xe0 [1592508.619801] [] ? insert_kthread_work+0x40/0x40 [1592508.619802] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.619804] [] ? insert_kthread_work+0x40/0x40 [1592508.619805] Task dump for CPU 23: [1592508.619806] ldlm_bl_09 R running task 0 147595 2 0x00000088 [1592508.619806] Call Trace: [1592508.619820] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.619834] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.619849] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.619851] [] ? wake_up_state+0x20/0x20 [1592508.619866] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.619867] [] kthread+0xd1/0xe0 [1592508.619870] [] ? finish_task_switch+0x57/0x1c0 [1592508.619871] [] ? insert_kthread_work+0x40/0x40 [1592508.619873] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.619875] [] ? insert_kthread_work+0x40/0x40 [1592508.619876] Task dump for CPU 16: [1592508.619878] ldlm_bl_14 R running task 0 147606 2 0x00000088 [1592508.619878] Call Trace: [1592508.619893] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.619907] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.619922] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.619923] [] ? wake_up_state+0x20/0x20 [1592508.619938] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.619939] [] kthread+0xd1/0xe0 [1592508.619941] [] ? insert_kthread_work+0x40/0x40 [1592508.619943] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.619944] [] ? insert_kthread_work+0x40/0x40 [1592508.619945] Task dump for CPU 18: [1592508.619947] ldlm_bl_08 R running task 0 147594 2 0x00000088 [1592508.619947] Call Trace: [1592508.619961] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.619975] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.619990] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.619992] [] ? wake_up_state+0x20/0x20 [1592508.620006] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.620007] [] kthread+0xd1/0xe0 [1592508.620009] [] ? insert_kthread_work+0x40/0x40 [1592508.620011] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.620012] [] ? insert_kthread_work+0x40/0x40 [1592508.620013] Task dump for CPU 20: [1592508.620014] ldlm_bl_01 R running task 0 146711 2 0x00000088 [1592508.620014] Call Trace: [1592508.620016] [] sched_show_task+0xa8/0x110 [1592508.620018] [] dump_cpu_task+0x39/0x70 [1592508.620020] [] rcu_dump_cpu_stacks+0x90/0xd0 [1592508.620022] [] rcu_check_callbacks+0x442/0x730 [1592508.620024] [] ? tick_sched_do_timer+0x50/0x50 [1592508.620027] [] update_process_times+0x46/0x80 [1592508.620029] [] tick_sched_handle+0x30/0x70 [1592508.620030] [] tick_sched_timer+0x39/0x80 [1592508.620033] [] __hrtimer_run_queues+0xf3/0x270 [1592508.620035] [] hrtimer_interrupt+0xaf/0x1d0 [1592508.620039] [] local_apic_timer_interrupt+0x3b/0x60 [1592508.620041] [] smp_apic_timer_interrupt+0x43/0x60 [1592508.620043] [] apic_timer_interrupt+0x162/0x170 [1592508.620046] [] ? native_queued_spin_lock_slowpath+0x126/0x200 [1592508.620049] [] queued_spin_lock_slowpath+0xb/0xf [1592508.620053] [] _raw_spin_lock+0x20/0x30 [1592508.620084] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592508.620094] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592508.620107] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592508.620120] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592508.620134] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.620148] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.620163] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.620165] [] ? wake_up_state+0x20/0x20 [1592508.620179] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.620181] [] kthread+0xd1/0xe0 [1592508.620182] [] ? insert_kthread_work+0x40/0x40 [1592508.620184] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.620185] [] ? insert_kthread_work+0x40/0x40 [1592508.620186] Task dump for CPU 23: [1592508.620187] ldlm_bl_09 R running task 0 147595 2 0x00000088 [1592508.620188] Call Trace: [1592508.620202] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.620216] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.620230] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.620232] [] ? wake_up_state+0x20/0x20 [1592508.620247] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.620248] [] kthread+0xd1/0xe0 [1592508.620251] [] ? finish_task_switch+0x57/0x1c0 [1592508.620252] [] ? insert_kthread_work+0x40/0x40 [1592508.620254] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.620256] [] ? insert_kthread_work+0x40/0x40 [1592508.622586] { 23} (t=60003 jiffies g=20931404 c=20931403 q=133339) [1592508.622593] Task dump for CPU 5: [1592508.622594] ldlm_bl_18 R running task 0 147615 2 0x00000088 [1592508.622597] Call Trace: [1592508.622614] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.622629] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.622643] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.622646] [] ? wake_up_state+0x20/0x20 [1592508.622660] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.622661] [] kthread+0xd1/0xe0 [1592508.622663] [] ? insert_kthread_work+0x40/0x40 [1592508.622666] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.622668] [] ? insert_kthread_work+0x40/0x40 [1592508.622669] Task dump for CPU 16: [1592508.622670] ldlm_bl_14 R running task 0 147606 2 0x00000088 [1592508.622671] Call Trace: [1592508.622685] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.622698] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.622712] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.622714] [] ? wake_up_state+0x20/0x20 [1592508.622727] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.622729] [] kthread+0xd1/0xe0 [1592508.622731] [] ? insert_kthread_work+0x40/0x40 [1592508.622733] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.622735] [] ? insert_kthread_work+0x40/0x40 [1592508.622736] Task dump for CPU 18: [1592508.622736] ldlm_bl_08 R running task 0 147594 2 0x00000088 [1592508.622738] Call Trace: [1592508.622751] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.622764] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.622777] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.622779] [] ? wake_up_state+0x20/0x20 [1592508.622792] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.622794] [] kthread+0xd1/0xe0 [1592508.622796] [] ? insert_kthread_work+0x40/0x40 [1592508.622798] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.622799] [] ? insert_kthread_work+0x40/0x40 [1592508.622800] Task dump for CPU 20: [1592508.622801] ldlm_bl_01 R running task 0 146711 2 0x00000088 [1592508.622802] Call Trace: [1592508.622815] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.622828] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.622842] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.622844] [] ? wake_up_state+0x20/0x20 [1592508.622856] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.622858] [] kthread+0xd1/0xe0 [1592508.622860] [] ? insert_kthread_work+0x40/0x40 [1592508.622862] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.622864] [] ? insert_kthread_work+0x40/0x40 [1592508.622865] Task dump for CPU 23: [1592508.622865] ldlm_bl_09 R running task 0 147595 2 0x00000088 [1592508.622867] Call Trace: [1592508.622868] [] sched_show_task+0xa8/0x110 [1592508.622871] [] dump_cpu_task+0x39/0x70 [1592508.622873] [] rcu_dump_cpu_stacks+0x90/0xd0 [1592508.622874] [] rcu_check_callbacks+0x442/0x730 [1592508.622877] [] ? tick_sched_do_timer+0x50/0x50 [1592508.622878] [] update_process_times+0x46/0x80 [1592508.622880] [] tick_sched_handle+0x30/0x70 [1592508.622882] [] tick_sched_timer+0x39/0x80 [1592508.622884] [] __hrtimer_run_queues+0xf3/0x270 [1592508.622886] [] hrtimer_interrupt+0xaf/0x1d0 [1592508.622889] [] local_apic_timer_interrupt+0x3b/0x60 [1592508.622891] [] smp_apic_timer_interrupt+0x43/0x60 [1592508.622893] [] apic_timer_interrupt+0x162/0x170 [1592508.622894] [] ? native_queued_spin_lock_slowpath+0x126/0x200 [1592508.622898] [] queued_spin_lock_slowpath+0xb/0xf [1592508.622901] [] _raw_spin_lock+0x20/0x30 [1592508.622916] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592508.622924] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592508.622936] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592508.622950] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592508.622963] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592508.622977] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592508.622991] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592508.622993] [] ? wake_up_state+0x20/0x20 [1592508.623006] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592508.623008] [] kthread+0xd1/0xe0 [1592508.623011] [] ? finish_task_switch+0x57/0x1c0 [1592508.623012] [] ? insert_kthread_work+0x40/0x40 [1592508.623015] [] ret_from_fork_nospec_begin+0x7/0x21 [1592508.623016] [] ? insert_kthread_work+0x40/0x40 [1592529.890389] NMI watchdog: BUG: soft lockup - CPU#12 stuck for 23s! [ldlm_bl_02:146712] [1592529.891564] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592529.891603] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592529.891644] CPU: 12 PID: 146712 Comm: ldlm_bl_02 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592529.891645] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592529.891647] task: ffff9db43a7e0000 ti: ffff9da9ab244000 task.ti: ffff9da9ab244000 [1592529.891649] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1592529.891657] RSP: 0018:ffff9da9ab247c70 EFLAGS: 00000246 [1592529.891658] RAX: 0000000000000000 RBX: ffff9daa5b2c4140 RCX: 0000000000610000 [1592529.891659] RDX: ffff9dbf3e0db780 RSI: 0000000000b90001 RDI: ffff9da7713d6058 [1592529.891660] RBP: ffff9da9ab247c70 R08: ffff9dbf3de1b780 R09: 0000000000000000 [1592529.891661] R10: 0000000000000000 R11: fffff902f896b280 R12: ffff9da9ab247c38 [1592529.891662] R13: ffff9db9f32a95a0 R14: ffff9db8ca208bd8 R15: ffff9db9f32a9638 [1592529.891663] FS: 0000000000000000(0000) GS:ffff9dbf3de00000(0000) knlGS:0000000000000000 [1592529.891664] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592529.891665] CR2: 000000c000c025a0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592529.891667] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592529.891668] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592529.891669] Call Trace: [1592529.891674] [] queued_spin_lock_slowpath+0xb/0xf [1592529.891679] [] _raw_spin_lock+0x20/0x30 [1592529.891710] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592529.891720] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592529.891746] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592529.891760] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592529.891775] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592529.891790] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592529.891805] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592529.891808] [] ? wake_up_state+0x20/0x20 [1592529.891822] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592529.891824] [] kthread+0xd1/0xe0 [1592529.891826] [] ? insert_kthread_work+0x40/0x40 [1592529.891829] [] ret_from_fork_nospec_begin+0x7/0x21 [1592529.891831] [] ? insert_kthread_work+0x40/0x40 [1592529.891832] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1592529.900388] NMI watchdog: BUG: soft lockup - CPU#16 stuck for 23s! [ldlm_bl_14:147606] [1592529.901643] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592529.901673] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592529.901705] CPU: 16 PID: 147606 Comm: ldlm_bl_14 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592529.901706] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592529.901708] task: ffff9d9f36a41040 ti: ffff9da1fa408000 task.ti: ffff9da1fa408000 [1592529.901709] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x120/0x200 [1592529.901714] RSP: 0018:ffff9da1fa40bc70 EFLAGS: 00000246 [1592529.901715] RAX: 0000000000000000 RBX: ffff9daf36ba6e40 RCX: 0000000000810000 [1592529.901717] RDX: ffff9daf3e61b780 RSI: 0000000000010001 RDI: ffff9da7713d6058 [1592529.901718] RBP: ffff9da1fa40bc70 R08: ffff9dbf3df1b780 R09: 0000000000000000 [1592529.901719] R10: 0000000000000000 R11: fffff90300f05780 R12: ffff9da1fa40bc38 [1592529.901720] R13: ffff9db9f1b50000 R14: ffff9dbc62b9d178 R15: ffff9db9f1b50098 [1592529.901722] FS: 0000000000000000(0000) GS:ffff9dbf3df00000(0000) knlGS:0000000000000000 [1592529.901723] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592529.901724] CR2: 00007f8d12f32000 CR3: 0000001fd9a26000 CR4: 00000000003607e0 [1592529.901725] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592529.901727] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592529.901727] Call Trace: [1592529.901731] [] queued_spin_lock_slowpath+0xb/0xf [1592529.901734] [] _raw_spin_lock+0x20/0x30 [1592529.901756] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592529.901765] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592529.901784] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592529.901798] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592529.901813] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592529.901828] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592529.901843] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592529.901846] [] ? wake_up_state+0x20/0x20 [1592529.901861] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592529.901864] [] kthread+0xd1/0xe0 [1592529.901866] [] ? insert_kthread_work+0x40/0x40 [1592529.901868] [] ret_from_fork_nospec_begin+0x7/0x21 [1592529.901870] [] ? insert_kthread_work+0x40/0x40 [1592529.901871] Code: c1 e8 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 90 41 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 [1592529.905389] NMI watchdog: BUG: soft lockup - CPU#18 stuck for 23s! [ldlm_bl_08:147594] [1592529.906374] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592529.906401] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592529.906427] CPU: 18 PID: 147594 Comm: ldlm_bl_08 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592529.906428] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592529.906430] task: ffff9daf17f3c100 ti: ffff9daa70b7c000 task.ti: ffff9daa70b7c000 [1592529.906431] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1592529.906435] RSP: 0018:ffff9daa70b7fc70 EFLAGS: 00000246 [1592529.906436] RAX: 0000000000000000 RBX: ffff9daa5b2c6080 RCX: 0000000000910000 [1592529.906437] RDX: ffff9dbf3dedb780 RSI: 0000000000790001 RDI: ffff9da7713d6058 [1592529.906438] RBP: ffff9daa70b7fc70 R08: ffff9dbf3df9b780 R09: 0000000000000000 [1592529.906439] R10: 0000000000000000 R11: fffff902deb4c400 R12: ffff9daa70b7fc38 [1592529.906440] R13: ffff9db91a33c0e0 R14: ffff9db8d7783058 R15: ffff9db91a33c178 [1592529.906441] FS: 0000000000000000(0000) GS:ffff9dbf3df80000(0000) knlGS:0000000000000000 [1592529.906443] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592529.906444] CR2: 0000555650b9b0c0 CR3: 0000001901220000 CR4: 00000000003607e0 [1592529.906445] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592529.906446] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592529.906446] Call Trace: [1592529.906449] [] queued_spin_lock_slowpath+0xb/0xf [1592529.906452] [] _raw_spin_lock+0x20/0x30 [1592529.906469] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592529.906477] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592529.906493] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592529.906507] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592529.906521] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592529.906535] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592529.906549] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592529.906552] [] ? wake_up_state+0x20/0x20 [1592529.906565] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592529.906567] [] kthread+0xd1/0xe0 [1592529.906569] [] ? insert_kthread_work+0x40/0x40 [1592529.906572] [] ret_from_fork_nospec_begin+0x7/0x21 [1592529.906573] [] ? insert_kthread_work+0x40/0x40 [1592529.906574] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1592533.803415] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 24s! [ldlm_bl_17:147610] [1592533.804691] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592533.804735] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592533.804776] CPU: 0 PID: 147610 Comm: ldlm_bl_17 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592533.804778] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592533.804780] task: ffff9dbf38076180 ti: ffff9daed8ec0000 task.ti: ffff9daed8ec0000 [1592533.804782] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592533.804792] RSP: 0000:ffff9daed8ec3c70 EFLAGS: 00000246 [1592533.804793] RAX: 0000000000000000 RBX: ffff9da73e3a1400 RCX: 0000000000010000 [1592533.804795] RDX: ffff9dbf3e05b780 RSI: 0000000000a90001 RDI: ffff9da7713d6058 [1592533.804796] RBP: ffff9daed8ec3c70 R08: ffff9daf3e61b780 R09: 0000000000000000 [1592533.804797] R10: 0000000000000000 R11: fffff902d61f6700 R12: ffff9daed8ec3c38 [1592533.804798] R13: ffff9da3e37e2b40 R14: ffff9daa5230c008 R15: ffff9da3e37e2bd8 [1592533.804800] FS: 0000000000000000(0000) GS:ffff9daf3e600000(0000) knlGS:0000000000000000 [1592533.804801] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592533.804803] CR2: 00000000004053f0 CR3: 0000000370242000 CR4: 00000000003607f0 [1592533.804805] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592533.804806] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592533.804807] Call Trace: [1592533.804814] [] queued_spin_lock_slowpath+0xb/0xf [1592533.804820] [] _raw_spin_lock+0x20/0x30 [1592533.804857] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592533.804869] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592533.804899] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592533.804913] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592533.804929] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592533.804945] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592533.804961] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592533.804965] [] ? wake_up_state+0x20/0x20 [1592533.804981] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592533.804984] [] kthread+0xd1/0xe0 [1592533.804986] [] ? insert_kthread_work+0x40/0x40 [1592533.804990] [] ret_from_fork_nospec_begin+0x7/0x21 [1592533.804992] [] ? insert_kthread_work+0x40/0x40 [1592533.804993] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592533.812412] NMI watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [ldlm_bl_18:147615] [1592533.813354] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592533.813382] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592533.813408] CPU: 5 PID: 147615 Comm: ldlm_bl_18 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592533.813410] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592533.813411] task: ffff9daf3d329040 ti: ffff9da740348000 task.ti: ffff9da740348000 [1592533.813412] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1592533.813417] RSP: 0018:ffff9da74034bc70 EFLAGS: 00000246 [1592533.813418] RAX: 0000000000000000 RBX: ffff9da199515040 RCX: 0000000000290000 [1592533.813419] RDX: ffff9daf3e6db780 RSI: 0000000000190001 RDI: ffff9da7713d6058 [1592533.813420] RBP: ffff9da74034bc70 R08: ffff9daf3e75b780 R09: 0000000000000000 [1592533.813421] R10: 0000000000000000 R11: fffff90329c49380 R12: ffff9da74034bc38 [1592533.813422] R13: ffff9da8b1f18ad0 R14: ffff9dac0d5b2ea8 R15: ffff9da8b1f18b68 [1592533.813424] FS: 0000000000000000(0000) GS:ffff9daf3e740000(0000) knlGS:0000000000000000 [1592533.813425] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592533.813426] CR2: 0000000001015000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592533.813427] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592533.813428] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592533.813429] Call Trace: [1592533.813432] [] queued_spin_lock_slowpath+0xb/0xf [1592533.813434] [] _raw_spin_lock+0x20/0x30 [1592533.813454] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592533.813463] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592533.813481] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592533.813496] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592533.813511] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592533.813526] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592533.813542] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592533.813544] [] ? wake_up_state+0x20/0x20 [1592533.813560] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592533.813562] [] kthread+0xd1/0xe0 [1592533.813564] [] ? insert_kthread_work+0x40/0x40 [1592533.813566] [] ret_from_fork_nospec_begin+0x7/0x21 [1592533.813568] [] ? insert_kthread_work+0x40/0x40 [1592533.813569] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1592533.814412] NMI watchdog: BUG: soft lockup - CPU#6 stuck for 22s! [ldlm_bl_21:147626] [1592533.815306] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592533.815334] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592533.815360] CPU: 6 PID: 147626 Comm: ldlm_bl_21 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592533.815362] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592533.815363] task: ffff9daf3d37a080 ti: ffff9da918c78000 task.ti: ffff9da918c78000 [1592533.815364] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592533.815369] RSP: 0018:ffff9da918c7bc70 EFLAGS: 00000246 [1592533.815370] RAX: 0000000000000000 RBX: ffff9da199514b40 RCX: 0000000000310000 [1592533.815371] RDX: ffff9daf3e61b780 RSI: 0000000000010001 RDI: ffff9da7713d6058 [1592533.815372] RBP: ffff9da918c7bc70 R08: ffff9daf3e79b780 R09: 0000000000000000 [1592533.815373] R10: 0000000000000000 R11: fffff9033fd38900 R12: ffff9da918c7bc38 [1592533.815374] R13: ffff9da26209d680 R14: ffff9dbf2ee9f298 R15: ffff9da26209d718 [1592533.815375] FS: 0000000000000000(0000) GS:ffff9daf3e780000(0000) knlGS:0000000000000000 [1592533.815376] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592533.815377] CR2: 000000c0002e5000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592533.815378] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592533.815379] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592533.815380] Call Trace: [1592533.815383] [] queued_spin_lock_slowpath+0xb/0xf [1592533.815386] [] _raw_spin_lock+0x20/0x30 [1592533.815402] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592533.815409] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592533.815423] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592533.815436] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592533.815450] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592533.815465] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592533.815479] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592533.815482] [] ? wake_up_state+0x20/0x20 [1592533.815495] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592533.815497] [] kthread+0xd1/0xe0 [1592533.815499] [] ? insert_kthread_work+0x40/0x40 [1592533.815501] [] ret_from_fork_nospec_begin+0x7/0x21 [1592533.815503] [] ? insert_kthread_work+0x40/0x40 [1592533.815504] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592533.892413] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_19:147617] [1592533.893436] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592533.893476] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592533.893517] CPU: 13 PID: 147617 Comm: ldlm_bl_19 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592533.893519] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592533.893521] task: ffff9daf22fe0000 ti: ffff9daf18e40000 task.ti: ffff9daf18e40000 [1592533.893522] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1592533.893530] RSP: 0018:ffff9daf18e43c70 EFLAGS: 00000246 [1592533.893531] RAX: 0000000000000000 RBX: ffff9da73e3a3d40 RCX: 0000000000690000 [1592533.893532] RDX: ffff9dbf3dedb780 RSI: 0000000000790001 RDI: ffff9da7713d6058 [1592533.893533] RBP: ffff9daf18e43c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [1592533.893534] R10: 0000000000000000 R11: fffff903005b9a00 R12: ffff9daf18e43c38 [1592533.893535] R13: ffff9db02eb9ab40 R14: ffff9dbf39798908 R15: ffff9db02eb9abd8 [1592533.893537] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [1592533.893538] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592533.893539] CR2: 0000563642689bb0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592533.893541] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592533.893542] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592533.893542] Call Trace: [1592533.893549] [] queued_spin_lock_slowpath+0xb/0xf [1592533.893553] [] _raw_spin_lock+0x20/0x30 [1592533.893584] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592533.893594] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592533.893620] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592533.893634] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592533.893649] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592533.893664] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592533.893679] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592533.893682] [] ? wake_up_state+0x20/0x20 [1592533.893696] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592533.893699] [] kthread+0xd1/0xe0 [1592533.893701] [] ? insert_kthread_work+0x40/0x40 [1592533.893704] [] ret_from_fork_nospec_begin+0x7/0x21 [1592533.893706] [] ? insert_kthread_work+0x40/0x40 [1592533.893707] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1592533.907413] NMI watchdog: BUG: soft lockup - CPU#19 stuck for 22s! [ldlm_bl_16:147609] [1592533.908306] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592533.908336] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592533.908367] CPU: 19 PID: 147609 Comm: ldlm_bl_16 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592533.908369] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592533.908371] task: ffff9dbf38072080 ti: ffff9dad4a704000 task.ti: ffff9dad4a704000 [1592533.908372] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592533.908377] RSP: 0018:ffff9dad4a707c70 EFLAGS: 00000246 [1592533.908378] RAX: 0000000000000000 RBX: ffff9da892d026c0 RCX: 0000000000990000 [1592533.908380] RDX: ffff9dbf3dedb780 RSI: 0000000000790001 RDI: ffff9da7713d6058 [1592533.908381] RBP: ffff9dad4a707c70 R08: ffff9dbf3dfdb780 R09: 0000000000000000 [1592533.908382] R10: 0000000000000000 R11: fffff902ca6cc480 R12: ffff9dad4a707c38 [1592533.908383] R13: ffff9db2f5bd0000 R14: ffff9dbf3d7385a8 R15: ffff9db2f5bd0098 [1592533.908384] FS: 0000000000000000(0000) GS:ffff9dbf3dfc0000(0000) knlGS:0000000000000000 [1592533.908385] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592533.908386] CR2: 00007fc89dcb2000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592533.908387] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592533.908388] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592533.908389] Call Trace: [1592533.908393] [] queued_spin_lock_slowpath+0xb/0xf [1592533.908396] [] _raw_spin_lock+0x20/0x30 [1592533.908418] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592533.908426] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592533.908444] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592533.908458] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592533.908472] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592533.908487] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592533.908501] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592533.908503] [] ? wake_up_state+0x20/0x20 [1592533.908517] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592533.908519] [] kthread+0xd1/0xe0 [1592533.908521] [] ? insert_kthread_work+0x40/0x40 [1592533.908523] [] ret_from_fork_nospec_begin+0x7/0x21 [1592533.908525] [] ? insert_kthread_work+0x40/0x40 [1592533.908526] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592533.910412] NMI watchdog: BUG: soft lockup - CPU#20 stuck for 22s! [ldlm_bl_01:146711] [1592533.911409] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592533.911436] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592533.911464] CPU: 20 PID: 146711 Comm: ldlm_bl_01 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592533.911466] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592533.911467] task: ffff9db43a7e1040 ti: ffff9daed9efc000 task.ti: ffff9daed9efc000 [1592533.911468] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592533.911473] RSP: 0018:ffff9daed9effc70 EFLAGS: 00000246 [1592533.911474] RAX: 0000000000000000 RBX: ffff9daa5b2c5040 RCX: 0000000000a10000 [1592533.911475] RDX: ffff9daf3e89b780 RSI: 0000000000510001 RDI: ffff9da7713d6058 [1592533.911477] RBP: ffff9daed9effc70 R08: ffff9dbf3e01b780 R09: 0000000000000000 [1592533.911478] R10: 0000000000000000 R11: ffffffffffffff9c R12: ffff9daed9effc38 [1592533.911479] R13: ffff9db9f1b53610 R14: ffff9dbc62b9d3b8 R15: ffff9db9f1b536a8 [1592533.911480] FS: 0000000000000000(0000) GS:ffff9dbf3e000000(0000) knlGS:0000000000000000 [1592533.911482] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592533.911483] CR2: 00007f8d12f32000 CR3: 0000001f30b18000 CR4: 00000000003607e0 [1592533.911484] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592533.911485] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592533.911486] Call Trace: [1592533.911489] [] queued_spin_lock_slowpath+0xb/0xf [1592533.911492] [] _raw_spin_lock+0x20/0x30 [1592533.911509] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592533.911516] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592533.911529] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592533.911543] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592533.911558] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592533.911573] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592533.911589] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592533.911591] [] ? wake_up_state+0x20/0x20 [1592533.911606] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592533.911608] [] kthread+0xd1/0xe0 [1592533.911611] [] ? insert_kthread_work+0x40/0x40 [1592533.911613] [] ret_from_fork_nospec_begin+0x7/0x21 [1592533.911615] [] ? insert_kthread_work+0x40/0x40 [1592533.911616] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592533.917412] NMI watchdog: BUG: soft lockup - CPU#23 stuck for 22s! [ldlm_bl_09:147595] [1592533.918388] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592533.918416] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592533.918443] CPU: 23 PID: 147595 Comm: ldlm_bl_09 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592533.918444] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592533.918446] task: ffff9daf17f3e180 ti: ffff9da94f774000 task.ti: ffff9da94f774000 [1592533.918447] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592533.918452] RSP: 0018:ffff9da94f777c70 EFLAGS: 00000246 [1592533.918453] RAX: 0000000000000000 RBX: ffff9da73e3a2bc0 RCX: 0000000000b90000 [1592533.918454] RDX: ffff9daf3e69b780 RSI: 0000000000110001 RDI: ffff9da7713d6058 [1592533.918455] RBP: ffff9da94f777c70 R08: ffff9dbf3e0db780 R09: 0000000000000000 [1592533.918457] R10: 0000000000000000 R11: fffff902da9fca80 R12: ffff9da94f777c38 [1592533.918458] R13: ffff9db775236150 R14: ffff9db4107fa908 R15: ffff9db7752361e8 [1592533.918459] FS: 0000000000000000(0000) GS:ffff9dbf3e0c0000(0000) knlGS:0000000000000000 [1592533.918461] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592533.918462] CR2: 0000555650e4c082 CR3: 0000001ff9610000 CR4: 00000000003607e0 [1592533.918463] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592533.918464] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592533.918465] Call Trace: [1592533.918468] [] queued_spin_lock_slowpath+0xb/0xf [1592533.918471] [] _raw_spin_lock+0x20/0x30 [1592533.918488] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592533.918495] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592533.918509] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592533.918523] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592533.918539] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592533.918554] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592533.918569] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592533.918571] [] ? wake_up_state+0x20/0x20 [1592533.918587] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592533.918589] [] kthread+0xd1/0xe0 [1592533.918592] [] ? finish_task_switch+0x57/0x1c0 [1592533.918594] [] ? insert_kthread_work+0x40/0x40 [1592533.918596] [] ret_from_fork_nospec_begin+0x7/0x21 [1592533.918598] [] ? insert_kthread_work+0x40/0x40 [1592533.918599] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592545.808469] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [ldlm_bl_30:148146] [1592545.809536] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592545.809576] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592545.809618] CPU: 3 PID: 148146 Comm: ldlm_bl_30 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592545.809619] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592545.809621] task: ffff9daf17be2080 ti: ffff9da2b023c000 task.ti: ffff9da2b023c000 [1592545.809623] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592545.809633] RSP: 0018:ffff9da2b023fc70 EFLAGS: 00000246 [1592545.809634] RAX: 0000000000000000 RBX: ffff9da199516e40 RCX: 0000000000190000 [1592545.809635] RDX: ffff9daf3e65b780 RSI: 0000000000090001 RDI: ffff9da7713d6058 [1592545.809637] RBP: ffff9da2b023fc70 R08: ffff9daf3e6db780 R09: 0000000000000000 [1592545.809638] R10: 0000000000000000 R11: fffff902f5467300 R12: ffff9da2b023fc38 [1592545.809639] R13: ffff9da315f12070 R14: ffff9da1cb2d7838 R15: ffff9da315f12108 [1592545.809641] FS: 0000000000000000(0000) GS:ffff9daf3e6c0000(0000) knlGS:0000000000000000 [1592545.809642] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592545.809643] CR2: 000000c0009c3000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592545.809645] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592545.809646] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592545.809647] Call Trace: [1592545.809654] [] queued_spin_lock_slowpath+0xb/0xf [1592545.809659] [] _raw_spin_lock+0x20/0x30 [1592545.809697] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592545.809709] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592545.809737] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592545.809752] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592545.809768] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592545.809784] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592545.809800] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592545.809804] [] ? wake_up_state+0x20/0x20 [1592545.809819] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592545.809824] [] kthread+0xd1/0xe0 [1592545.809826] [] ? insert_kthread_work+0x40/0x40 [1592545.809830] [] ret_from_fork_nospec_begin+0x7/0x21 [1592545.809832] [] ? insert_kthread_work+0x40/0x40 [1592545.809833] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592546.654405] LustreError: 11-0: ai400-OST0003-osc-ffff9dbf39146800: operation ldlm_enqueue to node 10.0.10.176@o2ib10 failed: rc = -107 [1592546.656145] Lustre: ai400-OST0003-osc-ffff9dbf39146800: Connection to ai400-OST0003 (at 10.0.10.176@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [1592546.657997] LustreError: 167-0: ai400-OST0003-osc-ffff9dbf39146800: This client was evicted by ai400-OST0003; in progress operations using this service will fail. [1592553.826506] NMI watchdog: BUG: soft lockup - CPU#11 stuck for 22s! [ldlm_bl_04:147590] [1592553.827456] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592553.827497] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592553.827539] CPU: 11 PID: 147590 Comm: ldlm_bl_04 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592553.827541] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592553.827543] task: ffff9daf086c8000 ti: ffff9da601f90000 task.ti: ffff9da601f90000 [1592553.827544] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592553.827554] RSP: 0000:ffff9da601f93c70 EFLAGS: 00000246 [1592553.827555] RAX: 0000000000000000 RBX: ffff9da73e3a1900 RCX: 0000000000590000 [1592553.827556] RDX: ffff9dbf3e05b780 RSI: 0000000000a90001 RDI: ffff9da7713d6058 [1592553.827557] RBP: ffff9da601f93c70 R08: ffff9daf3e8db780 R09: 0000000000000000 [1592553.827558] R10: 0000000000000000 R11: fffff902f8e6ec80 R12: ffff9da601f93c38 [1592553.827559] R13: ffff9da3e8f03610 R14: ffff9da282eb6128 R15: ffff9da3e8f036a8 [1592553.827561] FS: 0000000000000000(0000) GS:ffff9daf3e8c0000(0000) knlGS:0000000000000000 [1592553.827562] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592553.827563] CR2: 00007fe55de96f94 CR3: 0000000370242000 CR4: 00000000003607e0 [1592553.827564] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592553.827566] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592553.827566] Call Trace: [1592553.827573] [] queued_spin_lock_slowpath+0xb/0xf [1592553.827579] [] _raw_spin_lock+0x20/0x30 [1592553.827617] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592553.827629] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592553.827656] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592553.827670] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592553.827686] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592553.827701] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592553.827715] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592553.827719] [] ? wake_up_state+0x20/0x20 [1592553.827733] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592553.827736] [] kthread+0xd1/0xe0 [1592553.827738] [] ? insert_kthread_work+0x40/0x40 [1592553.827741] [] ret_from_fork_nospec_begin+0x7/0x21 [1592553.827743] [] ? insert_kthread_work+0x40/0x40 [1592553.827744] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592557.905523] NMI watchdog: BUG: soft lockup - CPU#18 stuck for 23s! [ldlm_bl_08:147594] [1592557.906671] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592557.906710] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592557.906753] CPU: 18 PID: 147594 Comm: ldlm_bl_08 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592557.906754] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592557.906756] task: ffff9daf17f3c100 ti: ffff9daa70b7c000 task.ti: ffff9daa70b7c000 [1592557.906758] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592557.906766] RSP: 0018:ffff9daa70b7fc70 EFLAGS: 00000246 [1592557.906767] RAX: 0000000000000000 RBX: ffff9daa5b2c5680 RCX: 0000000000910000 [1592557.906768] RDX: ffff9daf3e8db780 RSI: 0000000000590001 RDI: ffff9da7713d6058 [1592557.906769] RBP: ffff9daa70b7fc70 R08: ffff9dbf3df9b780 R09: 0000000000000000 [1592557.906770] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9daa70b7fc38 [1592557.906772] R13: ffff9db91a33c0e0 R14: ffff9db8d7783058 R15: ffff9db91a33c178 [1592557.906773] FS: 0000000000000000(0000) GS:ffff9dbf3df80000(0000) knlGS:0000000000000000 [1592557.906774] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592557.906776] CR2: 0000555650b9b0c0 CR3: 0000001901220000 CR4: 00000000003607e0 [1592557.906777] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592557.906778] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592557.906780] Call Trace: [1592557.906785] [] queued_spin_lock_slowpath+0xb/0xf [1592557.906790] [] _raw_spin_lock+0x20/0x30 [1592557.906820] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592557.906830] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592557.906855] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592557.906869] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592557.906885] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592557.906901] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592557.906917] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592557.906920] [] ? wake_up_state+0x20/0x20 [1592557.906936] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592557.906938] [] kthread+0xd1/0xe0 [1592557.906940] [] ? insert_kthread_work+0x40/0x40 [1592557.906943] [] ret_from_fork_nospec_begin+0x7/0x21 [1592557.906945] [] ? insert_kthread_work+0x40/0x40 [1592557.906946] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592561.803542] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [ldlm_bl_17:147610] [1592561.804475] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592561.804513] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592561.804568] CPU: 0 PID: 147610 Comm: ldlm_bl_17 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592561.804569] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592561.804571] task: ffff9dbf38076180 ti: ffff9daed8ec0000 task.ti: ffff9daed8ec0000 [1592561.804572] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592561.804582] RSP: 0000:ffff9daed8ec3c70 EFLAGS: 00000246 [1592561.804583] RAX: 0000000000000000 RBX: ffff9da1995143c0 RCX: 0000000000010000 [1592561.804584] RDX: ffff9dbf3e05b780 RSI: 0000000000a90001 RDI: ffff9da7713d6058 [1592561.804585] RBP: ffff9daed8ec3c70 R08: ffff9daf3e61b780 R09: 0000000000000000 [1592561.804587] R10: 0000000000000000 R11: fffff9032d02e880 R12: ffff9daed8ec3c38 [1592561.804588] R13: ffff9da3e37e2b40 R14: ffff9daa5230c008 R15: ffff9da3e37e2bd8 [1592561.804589] FS: 0000000000000000(0000) GS:ffff9daf3e600000(0000) knlGS:0000000000000000 [1592561.804590] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592561.804591] CR2: 00000000004053f0 CR3: 000000153ac10000 CR4: 00000000003607f0 [1592561.804593] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592561.804594] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592561.804595] Call Trace: [1592561.804602] [] queued_spin_lock_slowpath+0xb/0xf [1592561.804607] [] _raw_spin_lock+0x20/0x30 [1592561.804645] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592561.804658] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592561.804686] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592561.804701] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592561.804717] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592561.804733] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592561.804749] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592561.804753] [] ? wake_up_state+0x20/0x20 [1592561.804769] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592561.804772] [] kthread+0xd1/0xe0 [1592561.804774] [] ? insert_kthread_work+0x40/0x40 [1592561.804778] [] ret_from_fork_nospec_begin+0x7/0x21 [1592561.804780] [] ? insert_kthread_work+0x40/0x40 [1592561.804781] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592561.814540] NMI watchdog: BUG: soft lockup - CPU#6 stuck for 22s! [ldlm_bl_21:147626] [1592561.815288] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592561.815312] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592561.815336] CPU: 6 PID: 147626 Comm: ldlm_bl_21 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592561.815338] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592561.815339] task: ffff9daf3d37a080 ti: ffff9da918c78000 task.ti: ffff9da918c78000 [1592561.815340] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592561.815344] RSP: 0018:ffff9da918c7bc70 EFLAGS: 00000246 [1592561.815345] RAX: 0000000000000000 RBX: ffff9daa5b2c5900 RCX: 0000000000310000 [1592561.815346] RDX: ffff9dbf3dfdb780 RSI: 0000000000990001 RDI: ffff9da7713d6058 [1592561.815347] RBP: ffff9da918c7bc70 R08: ffff9daf3e79b780 R09: 0000000000000000 [1592561.815348] R10: 0000000000000000 R11: fffff90339c4c700 R12: ffff9da918c7bc38 [1592561.815349] R13: ffff9da26209d680 R14: ffff9dbf2ee9f298 R15: ffff9da26209d718 [1592561.815350] FS: 0000000000000000(0000) GS:ffff9daf3e780000(0000) knlGS:0000000000000000 [1592561.815352] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592561.815352] CR2: 000000c0002e5000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592561.815353] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592561.815354] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592561.815355] Call Trace: [1592561.815358] [] queued_spin_lock_slowpath+0xb/0xf [1592561.815360] [] _raw_spin_lock+0x20/0x30 [1592561.815376] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592561.815384] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592561.815397] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592561.815409] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592561.815423] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592561.815436] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592561.815449] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592561.815451] [] ? wake_up_state+0x20/0x20 [1592561.815464] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592561.815465] [] kthread+0xd1/0xe0 [1592561.815467] [] ? insert_kthread_work+0x40/0x40 [1592561.815469] [] ret_from_fork_nospec_begin+0x7/0x21 [1592561.815471] [] ? insert_kthread_work+0x40/0x40 [1592561.815472] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592561.892541] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_19:147617] [1592561.893589] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592561.893629] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592561.893671] CPU: 13 PID: 147617 Comm: ldlm_bl_19 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592561.893672] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592561.893674] task: ffff9daf22fe0000 ti: ffff9daf18e40000 task.ti: ffff9daf18e40000 [1592561.893676] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x154/0x200 [1592561.893684] RSP: 0018:ffff9daf18e43c70 EFLAGS: 00000202 [1592561.893685] RAX: 0000000000000001 RBX: ffff9da892d02bc0 RCX: 0000000000690000 [1592561.893686] RDX: 0000000000310001 RSI: 0000000000a90001 RDI: ffff9da7713d6058 [1592561.893688] RBP: ffff9daf18e43c70 R08: ffff9dbf3de5b780 R09: ffff9dbf3df5b780 [1592561.893689] R10: 0000000000000000 R11: fffff90337d8ea00 R12: ffff9daf18e43c38 [1592561.893690] R13: ffff9db02eb9ab40 R14: ffff9dbf39798908 R15: ffff9db02eb9abd8 [1592561.893692] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [1592561.893693] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592561.893694] CR2: 0000563642689bb0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592561.893696] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592561.893697] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592561.893698] Call Trace: [1592561.893703] [] queued_spin_lock_slowpath+0xb/0xf [1592561.893708] [] _raw_spin_lock+0x20/0x30 [1592561.893740] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592561.893750] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592561.893777] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592561.893792] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592561.893807] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592561.893823] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592561.893839] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592561.893842] [] ? wake_up_state+0x20/0x20 [1592561.893858] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592561.893861] [] kthread+0xd1/0xe0 [1592561.893863] [] ? insert_kthread_work+0x40/0x40 [1592561.893867] [] ret_from_fork_nospec_begin+0x7/0x21 [1592561.893869] [] ? insert_kthread_work+0x40/0x40 [1592561.893870] Code: f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 85 c0 74 21 83 f8 03 75 10 eb 1a 66 2e 0f 1f 84 00 00 00 00 00 85 c0 74 0c 90 8b 17 0f b7 c2 83 f8 03 75 f0 be 01 00 00 00 eb 15 66 0f [1592561.907541] NMI watchdog: BUG: soft lockup - CPU#19 stuck for 22s! [ldlm_bl_16:147609] [1592561.908423] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592561.908454] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592561.908486] CPU: 19 PID: 147609 Comm: ldlm_bl_16 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592561.908488] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592561.908490] task: ffff9dbf38072080 ti: ffff9dad4a704000 task.ti: ffff9dad4a704000 [1592561.908491] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x120/0x200 [1592561.908497] RSP: 0018:ffff9dad4a707c70 EFLAGS: 00000246 [1592561.908498] RAX: 0000000000000000 RBX: ffff9da892d008c0 RCX: 0000000000990000 [1592561.908499] RDX: ffff9dbf3e0db780 RSI: 0000000000b90001 RDI: ffff9da7713d6058 [1592561.908500] RBP: ffff9dad4a707c70 R08: ffff9dbf3dfdb780 R09: 0000000000000000 [1592561.908501] R10: 0000000000000000 R11: fffff9033634ad80 R12: ffff9dad4a707c38 [1592561.908503] R13: ffff9db2f5bd0000 R14: ffff9dbf3d7385a8 R15: ffff9db2f5bd0098 [1592561.908504] FS: 0000000000000000(0000) GS:ffff9dbf3dfc0000(0000) knlGS:0000000000000000 [1592561.908505] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592561.908507] CR2: 00007fc89dcb2000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592561.908508] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592561.908509] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592561.908510] Call Trace: [1592561.908513] [] queued_spin_lock_slowpath+0xb/0xf [1592561.908517] [] _raw_spin_lock+0x20/0x30 [1592561.908539] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592561.908548] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592561.908570] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592561.908584] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592561.908599] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592561.908614] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592561.908630] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592561.908632] [] ? wake_up_state+0x20/0x20 [1592561.908648] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592561.908650] [] kthread+0xd1/0xe0 [1592561.908652] [] ? insert_kthread_work+0x40/0x40 [1592561.908655] [] ret_from_fork_nospec_begin+0x7/0x21 [1592561.908657] [] ? insert_kthread_work+0x40/0x40 [1592561.908658] Code: c1 e8 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 90 41 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 [1592561.910540] NMI watchdog: BUG: soft lockup - CPU#20 stuck for 22s! [ldlm_bl_01:146711] [1592561.911320] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592561.911347] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592561.911374] CPU: 20 PID: 146711 Comm: ldlm_bl_01 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592561.911376] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592561.911377] task: ffff9db43a7e1040 ti: ffff9daed9efc000 task.ti: ffff9daed9efc000 [1592561.911379] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592561.911383] RSP: 0018:ffff9daed9effc70 EFLAGS: 00000246 [1592561.911384] RAX: 0000000000000000 RBX: ffff9da199514280 RCX: 0000000000a10000 [1592561.911385] RDX: ffff9dbf3df9b780 RSI: 0000000000910001 RDI: ffff9da7713d6058 [1592561.911387] RBP: ffff9daed9effc70 R08: ffff9dbf3e01b780 R09: 0000000000000000 [1592561.911388] R10: 0000000000000000 R11: fffff902f5e42200 R12: ffff9daed9effc38 [1592561.911389] R13: ffff9db9f1b53610 R14: ffff9dbc62b9d3b8 R15: ffff9db9f1b536a8 [1592561.911390] FS: 0000000000000000(0000) GS:ffff9dbf3e000000(0000) knlGS:0000000000000000 [1592561.911392] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592561.911393] CR2: 00007f8d12f32000 CR3: 0000001f30b18000 CR4: 00000000003607e0 [1592561.911394] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592561.911395] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592561.911396] Call Trace: [1592561.911399] [] queued_spin_lock_slowpath+0xb/0xf [1592561.911402] [] _raw_spin_lock+0x20/0x30 [1592561.911419] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592561.911426] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592561.911440] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592561.911454] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592561.911469] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592561.911484] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592561.911499] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592561.911502] [] ? wake_up_state+0x20/0x20 [1592561.911517] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592561.911519] [] kthread+0xd1/0xe0 [1592561.911521] [] ? insert_kthread_work+0x40/0x40 [1592561.911524] [] ret_from_fork_nospec_begin+0x7/0x21 [1592561.911526] [] ? insert_kthread_work+0x40/0x40 [1592561.911526] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592561.917541] NMI watchdog: BUG: soft lockup - CPU#23 stuck for 22s! [ldlm_bl_09:147595] [1592561.918171] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592561.918198] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592561.918226] CPU: 23 PID: 147595 Comm: ldlm_bl_09 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592561.918228] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592561.918229] task: ffff9daf17f3e180 ti: ffff9da94f774000 task.ti: ffff9da94f774000 [1592561.918230] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592561.918235] RSP: 0018:ffff9da94f777c70 EFLAGS: 00000246 [1592561.918236] RAX: 0000000000000000 RBX: ffff9daf36ba5400 RCX: 0000000000b90000 [1592561.918237] RDX: ffff9daf3e8db780 RSI: 0000000000590001 RDI: ffff9da7713d6058 [1592561.918238] RBP: ffff9da94f777c70 R08: ffff9dbf3e0db780 R09: 0000000000000000 [1592561.918239] R10: 0000000000000000 R11: fffff902fc7c7400 R12: ffff9da94f777c38 [1592561.918240] R13: ffff9db775236150 R14: ffff9db4107fa908 R15: ffff9db7752361e8 [1592561.918241] FS: 0000000000000000(0000) GS:ffff9dbf3e0c0000(0000) knlGS:0000000000000000 [1592561.918242] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592561.918243] CR2: 0000555650e4c082 CR3: 0000001ff9610000 CR4: 00000000003607e0 [1592561.918244] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592561.918245] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592561.918246] Call Trace: [1592561.918249] [] queued_spin_lock_slowpath+0xb/0xf [1592561.918252] [] _raw_spin_lock+0x20/0x30 [1592561.918270] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592561.918278] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592561.918296] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592561.918310] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592561.918325] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592561.918339] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592561.918353] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592561.918356] [] ? wake_up_state+0x20/0x20 [1592561.918369] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592561.918371] [] kthread+0xd1/0xe0 [1592561.918374] [] ? finish_task_switch+0x57/0x1c0 [1592561.918376] [] ? insert_kthread_work+0x40/0x40 [1592561.918378] [] ret_from_fork_nospec_begin+0x7/0x21 [1592561.918380] [] ? insert_kthread_work+0x40/0x40 [1592561.918381] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592581.826642] NMI watchdog: BUG: soft lockup - CPU#11 stuck for 22s! [ldlm_bl_04:147590] [1592581.827398] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1592581.827435] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1592581.827472] CPU: 11 PID: 147590 Comm: ldlm_bl_04 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1592581.827474] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1592581.827475] task: ffff9daf086c8000 ti: ffff9da601f90000 task.ti: ffff9da601f90000 [1592581.827477] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1592581.827486] RSP: 0000:ffff9da601f93c70 EFLAGS: 00000246 [1592581.827487] RAX: 0000000000000000 RBX: ffff9da73e3a1900 RCX: 0000000000590000 [1592581.827488] RDX: ffff9daf3e79b780 RSI: 0000000000310001 RDI: ffff9da7713d6058 [1592581.827489] RBP: ffff9da601f93c70 R08: ffff9daf3e8db780 R09: 0000000000000000 [1592581.827490] R10: 0000000000000000 R11: fffff9031f9d8780 R12: ffff9da601f93c38 [1592581.827491] R13: ffff9da3e8f03610 R14: ffff9da282eb6128 R15: ffff9da3e8f036a8 [1592581.827492] FS: 0000000000000000(0000) GS:ffff9daf3e8c0000(0000) knlGS:0000000000000000 [1592581.827493] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1592581.827494] CR2: 00007fe55de96f94 CR3: 000000153ac10000 CR4: 00000000003607e0 [1592581.827495] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1592581.827496] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1592581.827496] Call Trace: [1592581.827503] [] queued_spin_lock_slowpath+0xb/0xf [1592581.827508] [] _raw_spin_lock+0x20/0x30 [1592581.827546] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1592581.827558] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1592581.827584] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1592581.827598] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1592581.827613] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1592581.827627] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1592581.827641] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1592581.827644] [] ? wake_up_state+0x20/0x20 [1592581.827675] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1592581.827679] [] kthread+0xd1/0xe0 [1592581.827681] [] ? insert_kthread_work+0x40/0x40 [1592581.827684] [] ret_from_fork_nospec_begin+0x7/0x21 [1592581.827686] [] ? insert_kthread_work+0x40/0x40 [1592581.827687] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1592589.432665] Lustre: ai400-OST0003-osc-ffff9dbf39146800: Connection restored to 10.0.10.176@o2ib10 (at 10.0.10.176@o2ib10) [1592589.432670] Lustre: Skipped 2 previous similar messages [1593753.810011] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ldlm_bl_77:150644] [1593753.810758] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593753.810799] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593753.810839] CPU: 4 PID: 150644 Comm: ldlm_bl_77 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593753.810841] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593753.810843] task: ffff9dafe90e9040 ti: ffff9dbf327d8000 task.ti: ffff9dbf327d8000 [1593753.810844] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593753.810854] RSP: 0000:ffff9dbf327dbc70 EFLAGS: 00000246 [1593753.810855] RAX: 0000000000000000 RBX: ffff9dad77f2e300 RCX: 0000000000210000 [1593753.810856] RDX: ffff9daf3e79b780 RSI: 0000000000310001 RDI: ffff9da30dea8058 [1593753.810857] RBP: ffff9dbf327dbc70 R08: ffff9daf3e71b780 R09: 0000000000000000 [1593753.810858] R10: 0000000000000000 R11: fffff9033377e780 R12: ffff9dbf327dbc38 [1593753.810859] R13: ffff9daf09ee2b40 R14: ffff9daf3a21fb08 R15: ffff9daf09ee2bd8 [1593753.810861] FS: 0000000000000000(0000) GS:ffff9daf3e700000(0000) knlGS:0000000000000000 [1593753.810862] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593753.810863] CR2: 00007f0a72ab8f94 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593753.810865] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593753.810865] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593753.810867] Call Trace: [1593753.810873] [] queued_spin_lock_slowpath+0xb/0xf [1593753.810879] [] _raw_spin_lock+0x20/0x30 [1593753.810915] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593753.810927] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593753.810957] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593753.810971] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593753.810987] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593753.811002] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593753.811017] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593753.811020] [] ? wake_up_state+0x20/0x20 [1593753.811034] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593753.811037] [] kthread+0xd1/0xe0 [1593753.811039] [] ? insert_kthread_work+0x40/0x40 [1593753.811042] [] ret_from_fork_nospec_begin+0x7/0x21 [1593753.811044] [] ? insert_kthread_work+0x40/0x40 [1593753.811045] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593769.905931] NMI watchdog: BUG: soft lockup - CPU#18 stuck for 22s! [ldlm_bl_74:150641] [1593769.906850] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593769.906892] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593769.906933] CPU: 18 PID: 150641 Comm: ldlm_bl_74 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593769.906935] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593769.906937] task: ffff9dbf38238000 ti: ffff9db7d221c000 task.ti: ffff9db7d221c000 [1593769.906938] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593769.906947] RSP: 0018:ffff9db7d221fc70 EFLAGS: 00000246 [1593769.906948] RAX: 0000000000000000 RBX: ffff9daf1badb340 RCX: 0000000000910000 [1593769.906949] RDX: ffff9dbf3e05b780 RSI: 0000000000a90001 RDI: ffff9da30dea8058 [1593769.906950] RBP: ffff9db7d221fc70 R08: ffff9dbf3df9b780 R09: 0000000000000000 [1593769.906951] R10: 0000000000000000 R11: fffff902ed806b00 R12: ffff9db7d221fc38 [1593769.906953] R13: ffff9dbefa242070 R14: ffff9dbf3a7c15f8 R15: ffff9dbefa242108 [1593769.906954] FS: 0000000000000000(0000) GS:ffff9dbf3df80000(0000) knlGS:0000000000000000 [1593769.906956] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593769.906957] CR2: 00007f57c0025008 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593769.906958] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593769.906959] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593769.906960] Call Trace: [1593769.906966] [] queued_spin_lock_slowpath+0xb/0xf [1593769.906971] [] _raw_spin_lock+0x20/0x30 [1593769.907003] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593769.907014] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593769.907041] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593769.907055] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593769.907070] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593769.907086] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593769.907102] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593769.907105] [] ? wake_up_state+0x20/0x20 [1593769.907120] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593769.907123] [] kthread+0xd1/0xe0 [1593769.907125] [] ? insert_kthread_work+0x40/0x40 [1593769.907128] [] ret_from_fork_nospec_begin+0x7/0x21 [1593769.907130] [] ? insert_kthread_work+0x40/0x40 [1593769.907131] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593781.811314] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ldlm_bl_77:150644] [1593781.812052] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593781.812090] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593781.812130] CPU: 4 PID: 150644 Comm: ldlm_bl_77 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593781.812132] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593781.812134] task: ffff9dafe90e9040 ti: ffff9dbf327d8000 task.ti: ffff9dbf327d8000 [1593781.812135] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1593781.812144] RSP: 0000:ffff9dbf327dbc70 EFLAGS: 00000246 [1593781.812146] RAX: 0000000000000000 RBX: ffff9daec4dcee40 RCX: 0000000000210000 [1593781.812147] RDX: ffff9dbf3de1b780 RSI: 0000000000610001 RDI: ffff9da30dea8058 [1593781.812148] RBP: ffff9dbf327dbc70 R08: ffff9daf3e71b780 R09: 0000000000000000 [1593781.812149] R10: 0000000000000000 R11: fffff902f5281980 R12: ffff9dbf327dbc38 [1593781.812150] R13: ffff9daf09ee2b40 R14: ffff9daf3a21fb08 R15: ffff9daf09ee2bd8 [1593781.812151] FS: 0000000000000000(0000) GS:ffff9daf3e700000(0000) knlGS:0000000000000000 [1593781.812152] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593781.812153] CR2: 00007f0a72ab8f94 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593781.812155] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593781.812156] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593781.812157] Call Trace: [1593781.812164] [] queued_spin_lock_slowpath+0xb/0xf [1593781.812169] [] _raw_spin_lock+0x20/0x30 [1593781.812207] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593781.812219] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593781.812246] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593781.812261] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593781.812277] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593781.812292] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593781.812306] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593781.812310] [] ? wake_up_state+0x20/0x20 [1593781.812324] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593781.812327] [] kthread+0xd1/0xe0 [1593781.812329] [] ? insert_kthread_work+0x40/0x40 [1593781.812332] [] ret_from_fork_nospec_begin+0x7/0x21 [1593781.812334] [] ? insert_kthread_work+0x40/0x40 [1593781.812335] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1593797.906356] NMI watchdog: BUG: soft lockup - CPU#18 stuck for 22s! [ldlm_bl_74:150641] [1593797.907276] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593797.907316] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593797.907357] CPU: 18 PID: 150641 Comm: ldlm_bl_74 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593797.907358] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593797.907360] task: ffff9dbf38238000 ti: ffff9db7d221c000 task.ti: ffff9db7d221c000 [1593797.907361] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1593797.907369] RSP: 0018:ffff9db7d221fc70 EFLAGS: 00000246 [1593797.907371] RAX: 0000000000000000 RBX: ffff9daec4dcfac0 RCX: 0000000000910000 [1593797.907372] RDX: ffff9daf3e81b780 RSI: 0000000000410001 RDI: ffff9da30dea8058 [1593797.907373] RBP: ffff9db7d221fc70 R08: ffff9dbf3df9b780 R09: 0000000000000000 [1593797.907374] R10: 0000000000000000 R11: fffff9032c9d9000 R12: ffff9db7d221fc38 [1593797.907375] R13: ffff9dbefa242070 R14: ffff9dbf3a7c15f8 R15: ffff9dbefa242108 [1593797.907377] FS: 0000000000000000(0000) GS:ffff9dbf3df80000(0000) knlGS:0000000000000000 [1593797.907378] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593797.907379] CR2: 00007f57c0025008 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593797.907381] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593797.907382] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593797.907383] Call Trace: [1593797.907388] [] queued_spin_lock_slowpath+0xb/0xf [1593797.907393] [] _raw_spin_lock+0x20/0x30 [1593797.907424] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593797.907434] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593797.907461] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593797.907475] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593797.907491] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593797.907507] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593797.907522] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593797.907526] [] ? wake_up_state+0x20/0x20 [1593797.907541] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593797.907544] [] kthread+0xd1/0xe0 [1593797.907546] [] ? insert_kthread_work+0x40/0x40 [1593797.907549] [] ret_from_fork_nospec_begin+0x7/0x21 [1593797.907551] [] ? insert_kthread_work+0x40/0x40 [1593797.907552] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1593804.741373] INFO: rcu_sched self-detected stall on CPU { 18} (t=60000 jiffies g=21011083 c=21011082 q=288193) [1593804.742338] Task dump for CPU 4: [1593804.742340] ldlm_bl_77 R running task 0 150644 2 0x00000088 [1593804.742343] Call Trace: [1593804.742350] [] ? __schedule+0x42a/0x860 [1593804.742355] [] ? from_kgid+0x12/0x20 [1593804.742394] [] ? get_my_ctx+0x69/0x140 [ptlrpc] [1593804.742422] [] ? sptlrpc_import_check_ctx+0x1ed/0x3b0 [ptlrpc] [1593804.742435] [] ? ldlm_lock_match_with_skip+0x3a8/0x860 [ptlrpc] [1593804.742439] [] ? call_rcu_sched+0x1d/0x20 [1593804.742444] [] ? __radix_tree_delete_node+0x4f/0x170 [1593804.742448] [] ? free_pcppages_bulk+0x17e/0x3a0 [1593804.742451] [] ? radix_tree_next_chunk+0x157/0x2d0 [1593804.742453] [] ? radix_tree_next_chunk+0x116/0x2d0 [1593804.742462] [] ? radix_tree_gang_lookup+0xcd/0x150 [1593804.742484] [] ? cl2vvp_io+0x1d/0x90 [lustre] [1593804.742497] [] ? vvp_io_fini+0x34/0x6b0 [lustre] [1593804.742523] [] ? cl_io_fini+0x78/0x250 [obdclass] [1593804.742527] [] ? native_queued_spin_lock_slowpath+0x120/0x200 [1593804.742530] [] ? queued_spin_lock_slowpath+0xb/0xf [1593804.742538] [] ? _raw_spin_lock+0x20/0x30 [1593804.742554] [] ? cl_object_attr_lock+0x1a/0x20 [obdclass] [1593804.742396] INFO: rcu_sched self-detected stall on CPU { 4} (t=60001 jiffies g=21011083 c=21011082 q=288209) [1593804.743289] [] ? osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593804.743304] [] ? ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593804.743318] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593804.743333] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593804.743349] [] ? ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593804.743364] [] ? ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593804.743367] [] ? wake_up_state+0x20/0x20 [1593804.743382] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593804.743386] [] ? kthread+0xd1/0xe0 [1593804.743388] [] ? insert_kthread_work+0x40/0x40 [1593804.743391] [] ? ret_from_fork_nospec_begin+0x7/0x21 [1593804.743393] [] ? insert_kthread_work+0x40/0x40 [1593804.743395] Task dump for CPU 18: [1593804.743396] Task dump for CPU 4: [1593804.743398] INFO: rcu_sched detected stalls on CPUs/tasks: { [1593804.743401] ldlm_bl_77 R running task 0 150644 2 0x00000088 [1593804.743402] Call Trace: [1593804.743411] [] sched_show_task+0xa8/0x110 [1593804.743413] [] dump_cpu_task+0x39/0x70 [1593804.743417] [] rcu_dump_cpu_stacks+0x90/0xd0 [1593804.743419] [] rcu_check_callbacks+0x442/0x730 [1593804.743423] [] ? tick_sched_do_timer+0x50/0x50 [1593804.743427] [] update_process_times+0x46/0x80 [1593804.743429] [] tick_sched_handle+0x30/0x70 [1593804.743430] [] tick_sched_timer+0x39/0x80 [1593804.743435] [] __hrtimer_run_queues+0xf3/0x270 [1593804.743437] [] hrtimer_interrupt+0xaf/0x1d0 [1593804.743443] [] local_apic_timer_interrupt+0x3b/0x60 [1593804.743447] [] smp_apic_timer_interrupt+0x43/0x60 [1593804.743451] [] apic_timer_interrupt+0x162/0x170 [1593804.743455] [] ? native_queued_spin_lock_slowpath+0x120/0x200 [1593804.743459] [] queued_spin_lock_slowpath+0xb/0xf [1593804.743462] [] _raw_spin_lock+0x20/0x30 [1593804.743497] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593804.743508] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593804.743538] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593804.743552] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593804.743567] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593804.743582] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593804.743596] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593804.743598] [] ? wake_up_state+0x20/0x20 [1593804.743612] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593804.743613] [] kthread+0xd1/0xe0 [1593804.743615] [] ? insert_kthread_work+0x40/0x40 [1593804.743617] [] ret_from_fork_nospec_begin+0x7/0x21 [1593804.743618] [] ? insert_kthread_work+0x40/0x40 [1593804.744134] 4 [1593804.744134] ldlm_bl_74 R running task 0 150641 2 0x00000088 [1593804.744136] Call Trace: [1593804.744138] [] sched_show_task+0xa8/0x110 [1593804.744142] [] dump_cpu_task+0x39/0x70 [1593804.744144] [] rcu_dump_cpu_stacks+0x90/0xd0 [1593804.744146] [] rcu_check_callbacks+0x442/0x730 [1593804.744149] [] ? tick_sched_do_timer+0x50/0x50 [1593804.744153] [] update_process_times+0x46/0x80 [1593804.744155] [] tick_sched_handle+0x30/0x70 [1593804.744157] [] tick_sched_timer+0x39/0x80 [1593804.744160] [] __hrtimer_run_queues+0xf3/0x270 [1593804.744163] [] hrtimer_interrupt+0xaf/0x1d0 [1593804.744168] [] local_apic_timer_interrupt+0x3b/0x60 [1593804.744170] [] smp_apic_timer_interrupt+0x43/0x60 [1593804.744173] [] apic_timer_interrupt+0x162/0x170 [1593804.744174] [] ? native_queued_spin_lock_slowpath+0x126/0x200 [1593804.744179] [] queued_spin_lock_slowpath+0xb/0xf [1593804.744181] [] _raw_spin_lock+0x20/0x30 [1593804.744197] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593804.744205] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593804.744218] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593804.744232] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593804.744247] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593804.744261] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593804.744277] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593804.744279] [] ? wake_up_state+0x20/0x20 [1593804.744294] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593804.744296] [] kthread+0xd1/0xe0 [1593804.744298] [] ? insert_kthread_work+0x40/0x40 [1593804.744301] [] ret_from_fork_nospec_begin+0x7/0x21 [1593804.744303] [] ? insert_kthread_work+0x40/0x40 [1593804.744305] Task dump for CPU 18: [1593804.744306] ldlm_bl_74 R running task 0 150641 2 0x00000088 [1593804.744309] Call Trace: [1593804.744312] [] ? __schedule+0x42a/0x860 [1593804.744316] [] ? from_kgid+0x12/0x20 [1593804.744339] [] ? get_my_ctx+0x69/0x140 [ptlrpc] [1593804.744358] [] ? sptlrpc_import_check_ctx+0x1ed/0x3b0 [ptlrpc] [1593804.744371] [] ? ldlm_lock_match_with_skip+0x3a8/0x860 [ptlrpc] [1593804.744373] [] ? call_rcu_sched+0x1d/0x20 [1593804.744377] [] ? __radix_tree_delete_node+0x4f/0x170 [1593804.744379] [] ? radix_tree_next_chunk+0x263/0x2d0 [1593804.744381] [] ? radix_tree_next_chunk+0x116/0x2d0 [1593804.744383] [] ? radix_tree_gang_lookup+0xcd/0x150 [1593804.744404] [] ? cl2vvp_io+0x1d/0x90 [lustre] [1593804.744415] [] ? vvp_io_fini+0x34/0x6b0 [lustre] [1593804.744430] [] ? cl_io_fini+0x78/0x250 [obdclass] [1593804.744433] [] ? native_queued_spin_lock_slowpath+0x122/0x200 [1593804.744435] [] ? queued_spin_lock_slowpath+0xb/0xf [1593804.744437] [] ? _raw_spin_lock+0x20/0x30 [1593804.744451] [] ? cl_object_attr_lock+0x1a/0x20 [obdclass] [1593804.744457] [] ? osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593804.744469] [] ? ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593804.744483] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593804.744496] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593804.744510] [] ? ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593804.744524] [] ? ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593804.744526] [] ? wake_up_state+0x20/0x20 [1593804.744540] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593804.744542] [] ? kthread+0xd1/0xe0 [1593804.744543] [] ? insert_kthread_work+0x40/0x40 [1593804.744546] [] ? ret_from_fork_nospec_begin+0x7/0x21 [1593804.744547] [] ? insert_kthread_work+0x40/0x40 [1593804.744549] 18} (detected by 23, t=60003 jiffies, g=21011083, c=21011082, q=288217) [1593804.744559] Task dump for CPU 4: [1593804.744561] ldlm_bl_77 R running task 0 150644 2 0x00000088 [1593804.744563] Call Trace: [1593804.744567] [] ? __schedule+0x42a/0x860 [1593804.744570] [] ? from_kgid+0x12/0x20 [1593804.744592] [] ? get_my_ctx+0x69/0x140 [ptlrpc] [1593804.744611] [] ? sptlrpc_import_check_ctx+0x1ed/0x3b0 [ptlrpc] [1593804.744624] [] ? ldlm_lock_match_with_skip+0x3a8/0x860 [ptlrpc] [1593804.744626] [] ? call_rcu_sched+0x1d/0x20 [1593804.744628] [] ? __radix_tree_delete_node+0x4f/0x170 [1593804.744630] [] ? free_pcppages_bulk+0x17e/0x3a0 [1593804.744633] [] ? radix_tree_next_chunk+0x157/0x2d0 [1593804.744634] [] ? radix_tree_next_chunk+0x116/0x2d0 [1593804.744636] [] ? radix_tree_gang_lookup+0xcd/0x150 [1593804.744649] [] ? cl2vvp_io+0x1d/0x90 [lustre] [1593804.744660] [] ? vvp_io_fini+0x34/0x6b0 [lustre] [1593804.744676] [] ? cl_io_fini+0x78/0x250 [obdclass] [1593804.744679] [] ? native_queued_spin_lock_slowpath+0x120/0x200 [1593804.744681] [] ? queued_spin_lock_slowpath+0xb/0xf [1593804.744684] [] ? _raw_spin_lock+0x20/0x30 [1593804.744699] [] ? cl_object_attr_lock+0x1a/0x20 [obdclass] [1593804.744706] [] ? osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593804.744718] [] ? ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593804.744732] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593804.744747] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593804.744761] [] ? ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593804.744777] [] ? ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593804.744779] [] ? wake_up_state+0x20/0x20 [1593804.744794] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593804.744796] [] ? kthread+0xd1/0xe0 [1593804.744798] [] ? insert_kthread_work+0x40/0x40 [1593804.744801] [] ? ret_from_fork_nospec_begin+0x7/0x21 [1593804.744803] [] ? insert_kthread_work+0x40/0x40 [1593804.744804] Task dump for CPU 18: [1593804.744805] ldlm_bl_74 R running task 0 150641 2 0x00000088 [1593804.744807] Call Trace: [1593804.744809] [] ? __schedule+0x42a/0x860 [1593804.744812] [] ? from_kgid+0x12/0x20 [1593804.744832] [] ? get_my_ctx+0x69/0x140 [ptlrpc] [1593804.744852] [] ? sptlrpc_import_check_ctx+0x1ed/0x3b0 [ptlrpc] [1593804.744865] [] ? ldlm_lock_match_with_skip+0x3a8/0x860 [ptlrpc] [1593804.744867] [] ? call_rcu_sched+0x1d/0x20 [1593804.744869] [] ? __radix_tree_delete_node+0x4f/0x170 [1593804.744872] [] ? radix_tree_next_chunk+0x263/0x2d0 [1593804.744874] [] ? radix_tree_next_chunk+0x116/0x2d0 [1593804.744876] [] ? radix_tree_gang_lookup+0xcd/0x150 [1593804.744888] [] ? cl2vvp_io+0x1d/0x90 [lustre] [1593804.744900] [] ? vvp_io_fini+0x34/0x6b0 [lustre] [1593804.744915] [] ? cl_io_fini+0x78/0x250 [obdclass] [1593804.744918] [] ? native_queued_spin_lock_slowpath+0x122/0x200 [1593804.744920] [] ? queued_spin_lock_slowpath+0xb/0xf [1593804.744922] [] ? _raw_spin_lock+0x20/0x30 [1593804.744937] [] ? cl_object_attr_lock+0x1a/0x20 [obdclass] [1593804.744944] [] ? osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593804.744957] [] ? ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593804.744970] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593804.744985] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593804.745000] [] ? ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593804.745015] [] ? ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593804.745017] [] ? wake_up_state+0x20/0x20 [1593804.745032] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593804.745034] [] ? kthread+0xd1/0xe0 [1593804.745036] [] ? insert_kthread_work+0x40/0x40 [1593804.745038] [] ? ret_from_fork_nospec_begin+0x7/0x21 [1593804.745040] [] ? insert_kthread_work+0x40/0x40 [1593822.316505] LustreError: 11-0: ai400-OST0003-osc-ffff9dbf39146800: operation ldlm_enqueue to node 10.0.10.176@o2ib10 failed: rc = -107 [1593822.318080] Lustre: ai400-OST0003-osc-ffff9dbf39146800: Connection to ai400-OST0003 (at 10.0.10.176@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [1593822.318328] LustreError: 167-0: ai400-OST0003-osc-ffff9dbf39146800: This client was evicted by ai400-OST0003; in progress operations using this service will fail. [1593829.163522] Lustre: ai400-OST0004-osc-ffff9dbf39146800: Connection to ai400-OST0004 (at 10.0.10.177@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [1593829.163526] Lustre: Skipped 1 previous similar message [1593829.163709] LustreError: 167-0: ai400-OST0004-osc-ffff9dbf39146800: This client was evicted by ai400-OST0004; in progress operations using this service will fail. [1593829.165367] LustreError: Skipped 1 previous similar message [1593829.811468] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ldlm_bl_77:150644] [1593829.812557] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593829.812598] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593829.812639] CPU: 4 PID: 150644 Comm: ldlm_bl_77 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593829.812641] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593829.812643] task: ffff9dafe90e9040 ti: ffff9dbf327d8000 task.ti: ffff9dbf327d8000 [1593829.812645] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593829.812654] RSP: 0000:ffff9dbf327dbc70 EFLAGS: 00000246 [1593829.812655] RAX: 0000000000000000 RBX: ffff9dad77f2f700 RCX: 0000000000210000 [1593829.812657] RDX: ffff9dbf3e05b780 RSI: 0000000000a90001 RDI: ffff9da30dea8058 [1593829.812658] RBP: ffff9dbf327dbc70 R08: ffff9daf3e71b780 R09: 0000000000000000 [1593829.812659] R10: 0000000000000000 R11: fffff9032e08ef00 R12: ffff9dbf327dbc38 [1593829.812660] R13: ffff9daf09ee2b40 R14: ffff9daf3a21fb08 R15: ffff9daf09ee2bd8 [1593829.812662] FS: 0000000000000000(0000) GS:ffff9daf3e700000(0000) knlGS:0000000000000000 [1593829.812663] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593829.812664] CR2: 00007f0a72ab8f94 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593829.812666] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593829.812667] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593829.812668] Call Trace: [1593829.812674] [] queued_spin_lock_slowpath+0xb/0xf [1593829.812680] [] _raw_spin_lock+0x20/0x30 [1593829.812718] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593829.812730] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593829.812757] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593829.812772] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593829.812788] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593829.812805] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593829.812820] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593829.812824] [] ? wake_up_state+0x20/0x20 [1593829.812840] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593829.812843] [] kthread+0xd1/0xe0 [1593829.812845] [] ? insert_kthread_work+0x40/0x40 [1593829.812848] [] ret_from_fork_nospec_begin+0x7/0x21 [1593829.812850] [] ? insert_kthread_work+0x40/0x40 [1593829.812851] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593829.820467] NMI watchdog: BUG: soft lockup - CPU#8 stuck for 22s! [ldlm_bl_48:150612] [1593829.821268] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593829.821298] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593829.821328] CPU: 8 PID: 150612 Comm: ldlm_bl_48 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593829.821330] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593829.821331] task: ffff9dbf3823b0c0 ti: ffff9db655360000 task.ti: ffff9db655360000 [1593829.821333] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1593829.821339] RSP: 0018:ffff9db655363c70 EFLAGS: 00000246 [1593829.821340] RAX: 0000000000000000 RBX: ffff9dad77f2da40 RCX: 0000000000410000 [1593829.821341] RDX: ffff9daf3e6db780 RSI: 0000000000190001 RDI: ffff9da30dea8058 [1593829.821342] RBP: ffff9db655363c70 R08: ffff9daf3e81b780 R09: 0000000000000000 [1593829.821343] R10: 0000000000000000 R11: fffff90328b88080 R12: ffff9db655363c38 [1593829.821344] R13: ffff9dba122e5680 R14: ffff9dbdad264b48 R15: ffff9dba122e5718 [1593829.821346] FS: 0000000000000000(0000) GS:ffff9daf3e800000(0000) knlGS:0000000000000000 [1593829.821347] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593829.821348] CR2: 000000c000d11200 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593829.821349] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593829.821350] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593829.821351] Call Trace: [1593829.821354] [] queued_spin_lock_slowpath+0xb/0xf [1593829.821358] [] _raw_spin_lock+0x20/0x30 [1593829.821380] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593829.821390] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593829.821414] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593829.821428] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593829.821443] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593829.821459] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593829.821474] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593829.821477] [] ? wake_up_state+0x20/0x20 [1593829.821492] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593829.821495] [] kthread+0xd1/0xe0 [1593829.821497] [] ? insert_kthread_work+0x40/0x40 [1593829.821499] [] ret_from_fork_nospec_begin+0x7/0x21 [1593829.821501] [] ? insert_kthread_work+0x40/0x40 [1593829.821502] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1593829.906468] NMI watchdog: BUG: soft lockup - CPU#18 stuck for 22s! [ldlm_bl_74:150641] [1593829.906499] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593829.906531] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593829.906533] CPU: 18 PID: 150641 Comm: ldlm_bl_74 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593829.906534] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593829.906535] task: ffff9dbf38238000 ti: ffff9db7d221c000 task.ti: ffff9db7d221c000 [1593829.906543] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593829.906543] RSP: 0018:ffff9db7d221fc70 EFLAGS: 00000246 [1593829.906544] RAX: 0000000000000000 RBX: ffff9daec4dcc8c0 RCX: 0000000000910000 [1593829.906545] RDX: ffff9dbf3e09b780 RSI: 0000000000b10001 RDI: ffff9da30dea8058 [1593829.906545] RBP: ffff9db7d221fc70 R08: ffff9dbf3df9b780 R09: 0000000000000000 [1593829.906546] R10: 0000000000000000 R11: fffff903232bbb00 R12: ffff9db7d221fc38 [1593829.906546] R13: ffff9dbefa242070 R14: ffff9dbf3a7c15f8 R15: ffff9dbefa242108 [1593829.906547] FS: 0000000000000000(0000) GS:ffff9dbf3df80000(0000) knlGS:0000000000000000 [1593829.906548] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593829.906548] CR2: 00007f57c0025008 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593829.906549] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593829.906550] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593829.906550] Call Trace: [1593829.906555] [] queued_spin_lock_slowpath+0xb/0xf [1593829.906559] [] _raw_spin_lock+0x20/0x30 [1593829.906588] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593829.906599] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593829.906623] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593829.906636] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593829.906651] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593829.906665] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593829.906679] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593829.906682] [] ? wake_up_state+0x20/0x20 [1593829.906695] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593829.906697] [] kthread+0xd1/0xe0 [1593829.906698] [] ? insert_kthread_work+0x40/0x40 [1593829.906701] [] ret_from_fork_nospec_begin+0x7/0x21 [1593829.906702] [] ? insert_kthread_work+0x40/0x40 [1593829.906715] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593837.911543] NMI watchdog: BUG: soft lockup - CPU#20 stuck for 23s! [ldlm_bl_43:150605] [1593837.912699] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593837.912739] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593837.912781] CPU: 20 PID: 150605 Comm: ldlm_bl_43 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593837.912783] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593837.912785] task: ffff9dbb883f5140 ti: ffff9dbb4467c000 task.ti: ffff9dbb4467c000 [1593837.912786] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593837.912794] RSP: 0018:ffff9dbb4467fc70 EFLAGS: 00000246 [1593837.912795] RAX: 0000000000000000 RBX: ffff9dab6c8fe440 RCX: 0000000000a10000 [1593837.912796] RDX: ffff9dbf3de5b780 RSI: 0000000000690001 RDI: ffff9da30dea8058 [1593837.912797] RBP: ffff9dbb4467fc70 R08: ffff9dbf3e01b780 R09: 0000000000000000 [1593837.912798] R10: 0000000000000000 R11: fffff9031ec2c300 R12: ffff9dbb4467fc38 [1593837.912799] R13: ffff9dbe8baf8ad0 R14: ffff9dba6f2ca758 R15: ffff9dbe8baf8b68 [1593837.912801] FS: 0000000000000000(0000) GS:ffff9dbf3e000000(0000) knlGS:0000000000000000 [1593837.912802] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593837.912803] CR2: 000000c000de5000 CR3: 000000207f848000 CR4: 00000000003607e0 [1593837.912804] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593837.912805] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593837.912806] Call Trace: [1593837.912812] [] queued_spin_lock_slowpath+0xb/0xf [1593837.912817] [] _raw_spin_lock+0x20/0x30 [1593837.912847] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593837.912858] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593837.912883] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593837.912897] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593837.912913] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593837.912927] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593837.912942] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593837.912945] [] ? wake_up_state+0x20/0x20 [1593837.912959] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593837.912961] [] kthread+0xd1/0xe0 [1593837.912963] [] ? insert_kthread_work+0x40/0x40 [1593837.912966] [] ret_from_fork_nospec_begin+0x7/0x21 [1593837.912968] [] ? insert_kthread_work+0x40/0x40 [1593837.912969] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593849.893654] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_28:148141] [1593849.894809] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593849.894850] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593849.894892] CPU: 13 PID: 148141 Comm: ldlm_bl_28 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593849.894893] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593849.894895] task: ffff9daf17be30c0 ti: ffff9dac518c4000 task.ti: ffff9dac518c4000 [1593849.894897] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593849.894905] RSP: 0018:ffff9dac518c7c70 EFLAGS: 00000246 [1593849.894906] RAX: 0000000000000000 RBX: ffff9dad77f2d400 RCX: 0000000000690000 [1593849.894907] RDX: ffff9dbf3e09b780 RSI: 0000000000b10001 RDI: ffff9da30dea8058 [1593849.894908] RBP: ffff9dac518c7c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [1593849.894910] R10: 0000000000000000 R11: fffff90304ddaa00 R12: ffff9dac518c7c38 [1593849.894911] R13: ffff9db645af2070 R14: ffff9dba6ff5bb08 R15: ffff9db645af2108 [1593849.894912] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [1593849.894914] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593849.894915] CR2: 00007fad55587000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593849.894916] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593849.894918] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593849.894919] Call Trace: [1593849.894925] [] queued_spin_lock_slowpath+0xb/0xf [1593849.894930] [] _raw_spin_lock+0x20/0x30 [1593849.894961] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593849.894972] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593849.894999] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593849.895013] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593849.895029] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593849.895044] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593849.895060] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593849.895063] [] ? wake_up_state+0x20/0x20 [1593849.895079] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593849.895082] [] kthread+0xd1/0xe0 [1593849.895085] [] ? insert_kthread_work+0x40/0x40 [1593849.895089] [] ret_from_fork_nospec_begin+0x7/0x21 [1593849.895091] [] ? insert_kthread_work+0x40/0x40 [1593849.895092] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593857.811727] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ldlm_bl_77:150644] [1593857.812668] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593857.812708] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593857.812748] CPU: 4 PID: 150644 Comm: ldlm_bl_77 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593857.812750] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593857.812752] task: ffff9dafe90e9040 ti: ffff9dbf327d8000 task.ti: ffff9dbf327d8000 [1593857.812753] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593857.812762] RSP: 0000:ffff9dbf327dbc70 EFLAGS: 00000246 [1593857.812764] RAX: 0000000000000000 RBX: ffff9dab6c8ff480 RCX: 0000000000210000 [1593857.812765] RDX: ffff9dbf3e0db780 RSI: 0000000000b90001 RDI: ffff9da30dea8058 [1593857.812766] RBP: ffff9dbf327dbc70 R08: ffff9daf3e71b780 R09: 0000000000000000 [1593857.812767] R10: 0000000000000000 R11: fffff90328b8e780 R12: ffff9dbf327dbc38 [1593857.812768] R13: ffff9daf09ee2b40 R14: ffff9daf3a21fb08 R15: ffff9daf09ee2bd8 [1593857.812769] FS: 0000000000000000(0000) GS:ffff9daf3e700000(0000) knlGS:0000000000000000 [1593857.812771] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593857.812772] CR2: 00007f0a72ab8f94 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593857.812773] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593857.812774] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593857.812775] Call Trace: [1593857.812782] [] queued_spin_lock_slowpath+0xb/0xf [1593857.812787] [] _raw_spin_lock+0x20/0x30 [1593857.812824] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593857.812835] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593857.812865] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593857.812879] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593857.812895] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593857.812909] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593857.812924] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593857.812928] [] ? wake_up_state+0x20/0x20 [1593857.812942] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593857.812945] [] kthread+0xd1/0xe0 [1593857.812947] [] ? insert_kthread_work+0x40/0x40 [1593857.812950] [] ret_from_fork_nospec_begin+0x7/0x21 [1593857.812951] [] ? insert_kthread_work+0x40/0x40 [1593857.812952] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593857.820725] NMI watchdog: BUG: soft lockup - CPU#8 stuck for 22s! [ldlm_bl_48:150612] [1593857.821510] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593857.821538] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593857.821564] CPU: 8 PID: 150612 Comm: ldlm_bl_48 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593857.821566] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593857.821568] task: ffff9dbf3823b0c0 ti: ffff9db655360000 task.ti: ffff9db655360000 [1593857.821569] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593857.821573] RSP: 0018:ffff9db655363c70 EFLAGS: 00000246 [1593857.821574] RAX: 0000000000000000 RBX: ffff9daf13228c80 RCX: 0000000000410000 [1593857.821576] RDX: ffff9daf3e69b780 RSI: 0000000000110001 RDI: ffff9da30dea8058 [1593857.821577] RBP: ffff9db655363c70 R08: ffff9daf3e81b780 R09: 0000000000000000 [1593857.821578] R10: 0000000000000000 R11: fffff902d399d900 R12: ffff9db655363c38 [1593857.821579] R13: ffff9dba122e5680 R14: ffff9dbdad264b48 R15: ffff9dba122e5718 [1593857.821580] FS: 0000000000000000(0000) GS:ffff9daf3e800000(0000) knlGS:0000000000000000 [1593857.821581] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593857.821582] CR2: 000000c000d11200 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593857.821584] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593857.821585] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593857.821585] Call Trace: [1593857.821588] [] queued_spin_lock_slowpath+0xb/0xf [1593857.821591] [] _raw_spin_lock+0x20/0x30 [1593857.821610] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593857.821619] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593857.821635] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593857.821649] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593857.821663] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593857.821677] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593857.821691] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593857.821694] [] ? wake_up_state+0x20/0x20 [1593857.821708] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593857.821710] [] kthread+0xd1/0xe0 [1593857.821712] [] ? insert_kthread_work+0x40/0x40 [1593857.821715] [] ret_from_fork_nospec_begin+0x7/0x21 [1593857.821716] [] ? insert_kthread_work+0x40/0x40 [1593857.821717] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593857.906728] NMI watchdog: BUG: soft lockup - CPU#18 stuck for 22s! [ldlm_bl_74:150641] [1593857.906761] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593857.906794] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593857.906796] CPU: 18 PID: 150641 Comm: ldlm_bl_74 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593857.906797] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593857.906798] task: ffff9dbf38238000 ti: ffff9db7d221c000 task.ti: ffff9db7d221c000 [1593857.906806] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x120/0x200 [1593857.906806] RSP: 0018:ffff9db7d221fc70 EFLAGS: 00000246 [1593857.906807] RAX: 0000000000000000 RBX: ffff9daec4dcf0c0 RCX: 0000000000910000 [1593857.906808] RDX: ffff9daf3e75b780 RSI: 0000000000290001 RDI: ffff9da30dea8058 [1593857.906808] RBP: ffff9db7d221fc70 R08: ffff9dbf3df9b780 R09: 0000000000000000 [1593857.906809] R10: 0000000000000000 R11: fffff9033c66b980 R12: ffff9db7d221fc38 [1593857.906809] R13: ffff9dbefa242070 R14: ffff9dbf3a7c15f8 R15: ffff9dbefa242108 [1593857.906810] FS: 0000000000000000(0000) GS:ffff9dbf3df80000(0000) knlGS:0000000000000000 [1593857.906811] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593857.906811] CR2: 00007f57c0025008 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593857.906812] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593857.906813] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593857.906813] Call Trace: [1593857.906818] [] queued_spin_lock_slowpath+0xb/0xf [1593857.906822] [] _raw_spin_lock+0x20/0x30 [1593857.906851] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593857.906861] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593857.906886] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593857.906900] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593857.906915] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593857.906930] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593857.906945] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593857.906948] [] ? wake_up_state+0x20/0x20 [1593857.906963] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593857.906966] [] kthread+0xd1/0xe0 [1593857.906967] [] ? insert_kthread_work+0x40/0x40 [1593857.906971] [] ret_from_fork_nospec_begin+0x7/0x21 [1593857.906972] [] ? insert_kthread_work+0x40/0x40 [1593857.906985] Code: c1 e8 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 90 41 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 [1593860.562055] Lustre: ai400-OST0006-osc-ffff9dbf39146800: Connection to ai400-OST0006 (at 10.0.10.178@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [1593860.562223] LustreError: 167-0: ai400-OST0006-osc-ffff9dbf39146800: This client was evicted by ai400-OST0006; in progress operations using this service will fail. [1593865.815802] NMI watchdog: BUG: soft lockup - CPU#6 stuck for 23s! [ldlm_bl_115:150683] [1593865.816785] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593865.816824] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593865.816863] CPU: 6 PID: 150683 Comm: ldlm_bl_115 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593865.816865] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593865.816867] task: ffff9dbf390630c0 ti: ffff9dba86fd8000 task.ti: ffff9dba86fd8000 [1593865.816868] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x156/0x200 [1593865.816878] RSP: 0018:ffff9dba86fdbc70 EFLAGS: 00000202 [1593865.816880] RAX: 0000000000000001 RBX: ffff9daec4dce1c0 RCX: 0000000000310000 [1593865.816881] RDX: 0000000000410001 RSI: 0000000000910001 RDI: ffff9da30dea8058 [1593865.816882] RBP: ffff9dba86fdbc70 R08: ffff9daf3e79b780 R09: ffff9dbf3df1b780 [1593865.816883] R10: 0000000000000000 R11: fffff9031f63ef00 R12: ffff9dba86fdbc38 [1593865.816884] R13: ffff9db748a1e150 R14: ffff9db7d3ed4008 R15: ffff9db748a1e1e8 [1593865.816885] FS: 0000000000000000(0000) GS:ffff9daf3e780000(0000) knlGS:0000000000000000 [1593865.816887] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593865.816888] CR2: 0000000001b30658 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593865.816889] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593865.816890] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593865.816891] Call Trace: [1593865.816898] [] queued_spin_lock_slowpath+0xb/0xf [1593865.816903] [] _raw_spin_lock+0x20/0x30 [1593865.816942] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593865.816954] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593865.816981] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593865.816995] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593865.817011] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593865.817026] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593865.817040] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593865.817044] [] ? wake_up_state+0x20/0x20 [1593865.817058] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593865.817061] [] kthread+0xd1/0xe0 [1593865.817063] [] ? insert_kthread_work+0x40/0x40 [1593865.817066] [] ret_from_fork_nospec_begin+0x7/0x21 [1593865.817067] [] ? insert_kthread_work+0x40/0x40 [1593865.817068] Code: 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 85 c0 74 21 83 f8 03 75 10 eb 1a 66 2e 0f 1f 84 00 00 00 00 00 85 c0 74 0c f3 90 <8b> 17 0f b7 c2 83 f8 03 75 f0 be 01 00 00 00 eb 15 66 0f 1f 84 [1593865.911802] NMI watchdog: BUG: soft lockup - CPU#20 stuck for 23s! [ldlm_bl_43:150605] [1593865.911836] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593865.911870] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593865.911873] CPU: 20 PID: 150605 Comm: ldlm_bl_43 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593865.911873] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593865.911874] task: ffff9dbb883f5140 ti: ffff9dbb4467c000 task.ti: ffff9dbb4467c000 [1593865.911882] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1593865.911882] RSP: 0018:ffff9dbb4467fc70 EFLAGS: 00000246 [1593865.911883] RAX: 0000000000000000 RBX: ffff9dab6c8fda40 RCX: 0000000000a10000 [1593865.911883] RDX: ffff9daf3e8db780 RSI: 0000000000590001 RDI: ffff9da30dea8058 [1593865.911884] RBP: ffff9dbb4467fc70 R08: ffff9dbf3e01b780 R09: 0000000000000000 [1593865.911884] R10: 0000000000000000 R11: fffff902f828b600 R12: ffff9dbb4467fc38 [1593865.911885] R13: ffff9dbe8baf8ad0 R14: ffff9dba6f2ca758 R15: ffff9dbe8baf8b68 [1593865.911886] FS: 0000000000000000(0000) GS:ffff9dbf3e000000(0000) knlGS:0000000000000000 [1593865.911887] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593865.911887] CR2: 000000c000de5000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593865.911888] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593865.911888] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593865.911889] Call Trace: [1593865.911894] [] queued_spin_lock_slowpath+0xb/0xf [1593865.911898] [] _raw_spin_lock+0x20/0x30 [1593865.911930] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593865.911940] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593865.911963] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593865.911965] [] ? native_queued_spin_lock_slowpath+0x1d3/0x200 [1593865.911980] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593865.911995] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593865.912010] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593865.912014] [] ? wake_up_state+0x20/0x20 [1593865.912028] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593865.912031] [] kthread+0xd1/0xe0 [1593865.912033] [] ? insert_kthread_work+0x40/0x40 [1593865.912035] [] ret_from_fork_nospec_begin+0x7/0x21 [1593865.912037] [] ? insert_kthread_work+0x40/0x40 [1593865.912050] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1593877.893913] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_28:148141] [1593877.895143] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593877.895184] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593877.895225] CPU: 13 PID: 148141 Comm: ldlm_bl_28 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593877.895227] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593877.895229] task: ffff9daf17be30c0 ti: ffff9dac518c4000 task.ti: ffff9dac518c4000 [1593877.895230] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593877.895238] RSP: 0018:ffff9dac518c7c70 EFLAGS: 00000246 [1593877.895240] RAX: 0000000000000000 RBX: ffff9dad77f2de00 RCX: 0000000000690000 [1593877.895241] RDX: ffff9dbf3e05b780 RSI: 0000000000a90001 RDI: ffff9da30dea8058 [1593877.895242] RBP: ffff9dac518c7c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [1593877.895243] R10: 0000000000000000 R11: fffff902c7448280 R12: ffff9dac518c7c38 [1593877.895244] R13: ffff9db645af2070 R14: ffff9dba6ff5bb08 R15: ffff9db645af2108 [1593877.895246] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [1593877.895247] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593877.895248] CR2: 00007fad55587000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593877.895250] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593877.895251] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593877.895252] Call Trace: [1593877.895258] [] queued_spin_lock_slowpath+0xb/0xf [1593877.895262] [] _raw_spin_lock+0x20/0x30 [1593877.895295] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593877.895305] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593877.895329] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593877.895344] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593877.895360] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593877.895376] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593877.895391] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593877.895394] [] ? wake_up_state+0x20/0x20 [1593877.895410] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593877.895413] [] kthread+0xd1/0xe0 [1593877.895415] [] ? insert_kthread_work+0x40/0x40 [1593877.895418] [] ret_from_fork_nospec_begin+0x7/0x21 [1593877.895420] [] ? insert_kthread_work+0x40/0x40 [1593877.895421] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593885.811986] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ldlm_bl_77:150644] [1593885.813152] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593885.813193] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593885.813234] CPU: 4 PID: 150644 Comm: ldlm_bl_77 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593885.813235] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593885.813237] task: ffff9dafe90e9040 ti: ffff9dbf327d8000 task.ti: ffff9dbf327d8000 [1593885.813238] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1593885.813248] RSP: 0000:ffff9dbf327dbc70 EFLAGS: 00000246 [1593885.813249] RAX: 0000000000000000 RBX: ffff9daf13228140 RCX: 0000000000210000 [1593885.813250] RDX: ffff9daf3e81b780 RSI: 0000000000410001 RDI: ffff9da30dea8058 [1593885.813251] RBP: ffff9dbf327dbc70 R08: ffff9daf3e71b780 R09: 0000000000000000 [1593885.813253] R10: 0000000000000000 R11: fffff902f1b20900 R12: ffff9dbf327dbc38 [1593885.813254] R13: ffff9daf09ee2b40 R14: ffff9daf3a21fb08 R15: ffff9daf09ee2bd8 [1593885.813255] FS: 0000000000000000(0000) GS:ffff9daf3e700000(0000) knlGS:0000000000000000 [1593885.813257] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593885.813258] CR2: 00007f0a72ab8f94 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593885.813260] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593885.813261] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593885.813262] Call Trace: [1593885.813268] [] queued_spin_lock_slowpath+0xb/0xf [1593885.813274] [] _raw_spin_lock+0x20/0x30 [1593885.813309] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593885.813322] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593885.813352] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593885.813366] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593885.813382] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593885.813398] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593885.813414] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593885.813418] [] ? wake_up_state+0x20/0x20 [1593885.813433] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593885.813436] [] kthread+0xd1/0xe0 [1593885.813438] [] ? insert_kthread_work+0x40/0x40 [1593885.813442] [] ret_from_fork_nospec_begin+0x7/0x21 [1593885.813444] [] ? insert_kthread_work+0x40/0x40 [1593885.813445] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1593885.820985] NMI watchdog: BUG: soft lockup - CPU#8 stuck for 22s! [ldlm_bl_48:150612] [1593885.821857] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593885.821886] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593885.821914] CPU: 8 PID: 150612 Comm: ldlm_bl_48 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593885.821915] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593885.821917] task: ffff9dbf3823b0c0 ti: ffff9db655360000 task.ti: ffff9db655360000 [1593885.821918] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593885.821923] RSP: 0018:ffff9db655363c70 EFLAGS: 00000246 [1593885.821924] RAX: 0000000000000000 RBX: ffff9dab6c8fd7c0 RCX: 0000000000410000 [1593885.821925] RDX: ffff9daf3e71b780 RSI: 0000000000210001 RDI: ffff9da30dea8058 [1593885.821926] RBP: ffff9db655363c70 R08: ffff9daf3e81b780 R09: 0000000000000000 [1593885.821927] R10: 0000000000000000 R11: fffff903393e9a80 R12: ffff9db655363c38 [1593885.821928] R13: ffff9dba122e5680 R14: ffff9dbdad264b48 R15: ffff9dba122e5718 [1593885.821930] FS: 0000000000000000(0000) GS:ffff9daf3e800000(0000) knlGS:0000000000000000 [1593885.821931] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593885.821932] CR2: 000000c000d11200 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593885.821933] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593885.821934] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593885.821935] Call Trace: [1593885.821938] [] queued_spin_lock_slowpath+0xb/0xf [1593885.821942] [] _raw_spin_lock+0x20/0x30 [1593885.821963] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593885.821973] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593885.821995] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593885.822010] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593885.822025] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593885.822040] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593885.822055] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593885.822058] [] ? wake_up_state+0x20/0x20 [1593885.822074] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593885.822076] [] kthread+0xd1/0xe0 [1593885.822078] [] ? insert_kthread_work+0x40/0x40 [1593885.822081] [] ret_from_fork_nospec_begin+0x7/0x21 [1593885.822083] [] ? insert_kthread_work+0x40/0x40 [1593885.822084] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593885.906987] NMI watchdog: BUG: soft lockup - CPU#18 stuck for 22s! [ldlm_bl_74:150641] [1593885.907020] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593885.907053] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593885.907056] CPU: 18 PID: 150641 Comm: ldlm_bl_74 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593885.907056] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593885.907057] task: ffff9dbf38238000 ti: ffff9db7d221c000 task.ti: ffff9db7d221c000 [1593885.907065] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1593885.907065] RSP: 0018:ffff9db7d221fc70 EFLAGS: 00000246 [1593885.907066] RAX: 0000000000000000 RBX: ffff9daec4dcd2c0 RCX: 0000000000910000 [1593885.907067] RDX: ffff9daf3e69b780 RSI: 0000000000110001 RDI: ffff9da30dea8058 [1593885.907067] RBP: ffff9db7d221fc70 R08: ffff9dbf3df9b780 R09: 0000000000000000 [1593885.907068] R10: 0000000000000000 R11: fffff902e5affd80 R12: ffff9db7d221fc38 [1593885.907068] R13: ffff9dbefa242070 R14: ffff9dbf3a7c15f8 R15: ffff9dbefa242108 [1593885.907069] FS: 0000000000000000(0000) GS:ffff9dbf3df80000(0000) knlGS:0000000000000000 [1593885.907070] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593885.907070] CR2: 00007f57c0025008 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593885.907071] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593885.907071] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593885.907072] Call Trace: [1593885.907076] [] queued_spin_lock_slowpath+0xb/0xf [1593885.907081] [] _raw_spin_lock+0x20/0x30 [1593885.907110] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593885.907120] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593885.907143] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593885.907158] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593885.907172] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593885.907187] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593885.907202] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593885.907205] [] ? wake_up_state+0x20/0x20 [1593885.907220] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593885.907222] [] kthread+0xd1/0xe0 [1593885.907223] [] ? insert_kthread_work+0x40/0x40 [1593885.907226] [] ret_from_fork_nospec_begin+0x7/0x21 [1593885.907227] [] ? insert_kthread_work+0x40/0x40 [1593885.907240] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1593893.816054] NMI watchdog: BUG: soft lockup - CPU#6 stuck for 23s! [ldlm_bl_115:150683] [1593893.817047] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593893.817087] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593893.817128] CPU: 6 PID: 150683 Comm: ldlm_bl_115 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593893.817130] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593893.817131] task: ffff9dbf390630c0 ti: ffff9dba86fd8000 task.ti: ffff9dba86fd8000 [1593893.817133] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1593893.817143] RSP: 0018:ffff9dba86fdbc70 EFLAGS: 00000246 [1593893.817144] RAX: 0000000000000000 RBX: ffff9dad77f2f480 RCX: 0000000000310000 [1593893.817145] RDX: ffff9daf3e6db780 RSI: 0000000000190001 RDI: ffff9da30dea8058 [1593893.817146] RBP: ffff9dba86fdbc70 R08: ffff9daf3e79b780 R09: 0000000000000000 [1593893.817147] R10: 0000000000000000 R11: fffff90300dfb600 R12: ffff9dba86fdbc38 [1593893.817148] R13: ffff9db748a1e150 R14: ffff9db7d3ed4008 R15: ffff9db748a1e1e8 [1593893.817150] FS: 0000000000000000(0000) GS:ffff9daf3e780000(0000) knlGS:0000000000000000 [1593893.817151] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593893.817152] CR2: 0000000001b30658 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593893.817153] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593893.817154] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593893.817155] Call Trace: [1593893.817162] [] queued_spin_lock_slowpath+0xb/0xf [1593893.817168] [] _raw_spin_lock+0x20/0x30 [1593893.817203] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593893.817215] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593893.817243] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593893.817258] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593893.817274] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593893.817289] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593893.817304] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593893.817307] [] ? wake_up_state+0x20/0x20 [1593893.817321] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593893.817324] [] kthread+0xd1/0xe0 [1593893.817326] [] ? insert_kthread_work+0x40/0x40 [1593893.817329] [] ret_from_fork_nospec_begin+0x7/0x21 [1593893.817331] [] ? insert_kthread_work+0x40/0x40 [1593893.817332] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1593893.912054] NMI watchdog: BUG: soft lockup - CPU#20 stuck for 22s! [ldlm_bl_43:150605] [1593893.912087] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593893.912120] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593893.912123] CPU: 20 PID: 150605 Comm: ldlm_bl_43 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593893.912123] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593893.912124] task: ffff9dbb883f5140 ti: ffff9dbb4467c000 task.ti: ffff9dbb4467c000 [1593893.912132] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x158/0x200 [1593893.912133] RSP: 0018:ffff9dbb4467fc70 EFLAGS: 00000202 [1593893.912133] RAX: 0000000000000001 RBX: ffff9daec4dcfc00 RCX: 0000000000a10000 [1593893.912134] RDX: 0000000000b10001 RSI: 0000000000410001 RDI: ffff9da30dea8058 [1593893.912134] RBP: ffff9dbb4467fc70 R08: ffff9dbf3e01b780 R09: ffff9dbf3e0db780 [1593893.912135] R10: 0000000000000000 R11: fffff902e6bfd980 R12: ffff9dbb4467fc38 [1593893.912135] R13: ffff9dbe8baf8ad0 R14: ffff9dba6f2ca758 R15: ffff9dbe8baf8b68 [1593893.912136] FS: 0000000000000000(0000) GS:ffff9dbf3e000000(0000) knlGS:0000000000000000 [1593893.912137] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593893.912138] CR2: 000000c000de5000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593893.912138] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593893.912139] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593893.912139] Call Trace: [1593893.912144] [] queued_spin_lock_slowpath+0xb/0xf [1593893.912148] [] _raw_spin_lock+0x20/0x30 [1593893.912179] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593893.912190] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593893.912213] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593893.912226] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593893.912242] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593893.912257] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593893.912272] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593893.912275] [] ? wake_up_state+0x20/0x20 [1593893.912289] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593893.912291] [] kthread+0xd1/0xe0 [1593893.912293] [] ? insert_kthread_work+0x40/0x40 [1593893.912295] [] ret_from_fork_nospec_begin+0x7/0x21 [1593893.912297] [] ? insert_kthread_work+0x40/0x40 [1593893.912310] Code: 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 85 c0 74 21 83 f8 03 75 10 eb 1a 66 2e 0f 1f 84 00 00 00 00 00 85 c0 74 0c f3 90 8b 17 <0f> b7 c2 83 f8 03 75 f0 be 01 00 00 00 eb 15 66 0f 1f 84 00 00 [1593905.894145] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_28:148141] [1593905.895388] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593905.895430] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593905.895471] CPU: 13 PID: 148141 Comm: ldlm_bl_28 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593905.895473] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593905.895475] task: ffff9daf17be30c0 ti: ffff9dac518c4000 task.ti: ffff9dac518c4000 [1593905.895476] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593905.895484] RSP: 0018:ffff9dac518c7c70 EFLAGS: 00000246 [1593905.895485] RAX: 0000000000000000 RBX: ffff9daf1bada1c0 RCX: 0000000000690000 [1593905.895486] RDX: ffff9dbf3de1b780 RSI: 0000000000610001 RDI: ffff9da30dea8058 [1593905.895487] RBP: ffff9dac518c7c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [1593905.895488] R10: 0000000000000000 R11: fffff9032f8bad00 R12: ffff9dac518c7c38 [1593905.895490] R13: ffff9db645af2070 R14: ffff9dba6ff5bb08 R15: ffff9db645af2108 [1593905.895491] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [1593905.895492] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593905.895493] CR2: 00007fad55587000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593905.895495] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593905.895496] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593905.895497] Call Trace: [1593905.895503] [] queued_spin_lock_slowpath+0xb/0xf [1593905.895507] [] _raw_spin_lock+0x20/0x30 [1593905.895539] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593905.895550] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593905.895574] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593905.895588] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593905.895604] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593905.895619] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593905.895635] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593905.895638] [] ? wake_up_state+0x20/0x20 [1593905.895654] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593905.895657] [] kthread+0xd1/0xe0 [1593905.895659] [] ? insert_kthread_work+0x40/0x40 [1593905.895662] [] ret_from_fork_nospec_begin+0x7/0x21 [1593905.895664] [] ? insert_kthread_work+0x40/0x40 [1593905.895665] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593909.807176] NMI watchdog: BUG: soft lockup - CPU#2 stuck for 23s! [ldlm_bl_111:150679] [1593909.808352] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593909.808393] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593909.808434] CPU: 2 PID: 150679 Comm: ldlm_bl_111 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593909.808436] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593909.808438] task: ffff9daf3e33b0c0 ti: ffff9dab84bdc000 task.ti: ffff9dab84bdc000 [1593909.808439] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593909.808449] RSP: 0018:ffff9dab84bdfc70 EFLAGS: 00000246 [1593909.808450] RAX: 0000000000000000 RBX: ffff9daf13229e00 RCX: 0000000000110000 [1593909.808452] RDX: ffff9daf3e6db780 RSI: 0000000000190001 RDI: ffff9da30dea8058 [1593909.808453] RBP: ffff9dab84bdfc70 R08: ffff9daf3e69b780 R09: 0000000000000000 [1593909.808454] R10: 0000000000000000 R11: fffff90333fd7200 R12: ffff9dab84bdfc38 [1593909.808455] R13: ffff9dae51a53610 R14: ffff9daf38ccad88 R15: ffff9dae51a536a8 [1593909.808457] FS: 0000000000000000(0000) GS:ffff9daf3e680000(0000) knlGS:0000000000000000 [1593909.808458] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593909.808459] CR2: 00007f736a4d09e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593909.808461] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593909.808462] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593909.808464] Call Trace: [1593909.808471] [] queued_spin_lock_slowpath+0xb/0xf [1593909.808476] [] _raw_spin_lock+0x20/0x30 [1593909.808515] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593909.808528] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593909.808555] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593909.808569] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593909.808585] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593909.808601] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593909.808617] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593909.808621] [] ? wake_up_state+0x20/0x20 [1593909.808636] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593909.808639] [] kthread+0xd1/0xe0 [1593909.808641] [] ? insert_kthread_work+0x40/0x40 [1593909.808645] [] ret_from_fork_nospec_begin+0x7/0x21 [1593909.808647] [] ? insert_kthread_work+0x40/0x40 [1593909.808648] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593909.919177] NMI watchdog: BUG: soft lockup - CPU#23 stuck for 23s! [ldlm_bl_35:150593] [1593909.919210] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593909.919243] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593909.919245] CPU: 23 PID: 150593 Comm: ldlm_bl_35 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593909.919246] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593909.919247] task: ffff9dbf37c96180 ti: ffff9dba2ce94000 task.ti: ffff9dba2ce94000 [1593909.919255] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593909.919256] RSP: 0018:ffff9dba2ce97c70 EFLAGS: 00000246 [1593909.919256] RAX: 0000000000000000 RBX: ffff9dab6c8fc8c0 RCX: 0000000000b90000 [1593909.919257] RDX: ffff9dbf3de5b780 RSI: 0000000000690001 RDI: ffff9da30dea8058 [1593909.919257] RBP: ffff9dba2ce97c70 R08: ffff9dbf3e0db780 R09: 0000000000000000 [1593909.919258] R10: 0000000000000000 R11: fffff9031729a980 R12: ffff9dba2ce97c38 [1593909.919259] R13: ffff9da901416150 R14: ffff9dbcbc3426c8 R15: ffff9da9014161e8 [1593909.919260] FS: 0000000000000000(0000) GS:ffff9dbf3e0c0000(0000) knlGS:0000000000000000 [1593909.919260] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593909.919261] CR2: 0000000001a81000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593909.919262] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593909.919262] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593909.919263] Call Trace: [1593909.919268] [] queued_spin_lock_slowpath+0xb/0xf [1593909.919272] [] _raw_spin_lock+0x20/0x30 [1593909.919302] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593909.919312] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593909.919335] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593909.919349] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593909.919364] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593909.919379] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593909.919394] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593909.919396] [] ? wake_up_state+0x20/0x20 [1593909.919411] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593909.919414] [] kthread+0xd1/0xe0 [1593909.919415] [] ? insert_kthread_work+0x40/0x40 [1593909.919418] [] ret_from_fork_nospec_begin+0x7/0x21 [1593909.919419] [] ? insert_kthread_work+0x40/0x40 [1593909.919432] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593913.812206] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ldlm_bl_77:150644] [1593913.813329] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593913.813370] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593913.813410] CPU: 4 PID: 150644 Comm: ldlm_bl_77 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593913.813412] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593913.813413] task: ffff9dafe90e9040 ti: ffff9dbf327d8000 task.ti: ffff9dbf327d8000 [1593913.813415] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1593913.813424] RSP: 0000:ffff9dbf327dbc70 EFLAGS: 00000246 [1593913.813426] RAX: 0000000000000000 RBX: ffff9dad77f2d900 RCX: 0000000000210000 [1593913.813427] RDX: ffff9dbf3df1b780 RSI: 0000000000810001 RDI: ffff9da30dea8058 [1593913.813428] RBP: ffff9dbf327dbc70 R08: ffff9daf3e71b780 R09: 0000000000000000 [1593913.813429] R10: 0000000000000000 R11: fffff902fc6d5c00 R12: ffff9dbf327dbc38 [1593913.813431] R13: ffff9daf09ee2b40 R14: ffff9daf3a21fb08 R15: ffff9daf09ee2bd8 [1593913.813432] FS: 0000000000000000(0000) GS:ffff9daf3e700000(0000) knlGS:0000000000000000 [1593913.813434] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593913.813435] CR2: 00007f0a72ab8f94 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593913.813436] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593913.813438] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593913.813439] Call Trace: [1593913.813446] [] queued_spin_lock_slowpath+0xb/0xf [1593913.813451] [] _raw_spin_lock+0x20/0x30 [1593913.813487] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593913.813499] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593913.813528] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593913.813543] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593913.813559] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593913.813575] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593913.813591] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593913.813594] [] ? wake_up_state+0x20/0x20 [1593913.813610] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593913.813613] [] kthread+0xd1/0xe0 [1593913.813615] [] ? insert_kthread_work+0x40/0x40 [1593913.813618] [] ret_from_fork_nospec_begin+0x7/0x21 [1593913.813620] [] ? insert_kthread_work+0x40/0x40 [1593913.813621] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1593913.821204] NMI watchdog: BUG: soft lockup - CPU#8 stuck for 22s! [ldlm_bl_48:150612] [1593913.822198] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593913.822229] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593913.822260] CPU: 8 PID: 150612 Comm: ldlm_bl_48 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593913.822261] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593913.822263] task: ffff9dbf3823b0c0 ti: ffff9db655360000 task.ti: ffff9db655360000 [1593913.822264] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593913.822270] RSP: 0018:ffff9db655363c70 EFLAGS: 00000246 [1593913.822271] RAX: 0000000000000000 RBX: ffff9daf1badbc00 RCX: 0000000000410000 [1593913.822272] RDX: ffff9dbf3de1b780 RSI: 0000000000610001 RDI: ffff9da30dea8058 [1593913.822273] RBP: ffff9db655363c70 R08: ffff9daf3e81b780 R09: 0000000000000000 [1593913.822274] R10: 0000000000000000 R11: fffff9032217fe80 R12: ffff9db655363c38 [1593913.822276] R13: ffff9dba122e5680 R14: ffff9dbdad264b48 R15: ffff9dba122e5718 [1593913.822277] FS: 0000000000000000(0000) GS:ffff9daf3e800000(0000) knlGS:0000000000000000 [1593913.822278] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593913.822280] CR2: 000000c000d11200 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593913.822281] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593913.822282] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593913.822283] Call Trace: [1593913.822286] [] queued_spin_lock_slowpath+0xb/0xf [1593913.822289] [] _raw_spin_lock+0x20/0x30 [1593913.822308] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593913.822317] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593913.822334] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593913.822348] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593913.822363] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593913.822378] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593913.822393] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593913.822396] [] ? wake_up_state+0x20/0x20 [1593913.822411] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593913.822413] [] kthread+0xd1/0xe0 [1593913.822415] [] ? insert_kthread_work+0x40/0x40 [1593913.822418] [] ret_from_fork_nospec_begin+0x7/0x21 [1593913.822420] [] ? insert_kthread_work+0x40/0x40 [1593913.822421] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593913.907206] NMI watchdog: BUG: soft lockup - CPU#18 stuck for 23s! [ldlm_bl_74:150641] [1593913.907239] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593913.907271] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593913.907274] CPU: 18 PID: 150641 Comm: ldlm_bl_74 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593913.907275] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593913.907276] task: ffff9dbf38238000 ti: ffff9db7d221c000 task.ti: ffff9db7d221c000 [1593913.907283] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593913.907284] RSP: 0018:ffff9db7d221fc70 EFLAGS: 00000246 [1593913.907284] RAX: 0000000000000000 RBX: ffff9daf1bad8140 RCX: 0000000000910000 [1593913.907285] RDX: ffff9dbf3e01b780 RSI: 0000000000a10001 RDI: ffff9da30dea8058 [1593913.907285] RBP: ffff9db7d221fc70 R08: ffff9dbf3df9b780 R09: 0000000000000000 [1593913.907286] R10: 0000000000000000 R11: fffff902fae2ae80 R12: ffff9db7d221fc38 [1593913.907286] R13: ffff9dbefa242070 R14: ffff9dbf3a7c15f8 R15: ffff9dbefa242108 [1593913.907287] FS: 0000000000000000(0000) GS:ffff9dbf3df80000(0000) knlGS:0000000000000000 [1593913.907288] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593913.907289] CR2: 00007f57c0025008 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593913.907289] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593913.907290] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593913.907290] Call Trace: [1593913.907295] [] queued_spin_lock_slowpath+0xb/0xf [1593913.907299] [] _raw_spin_lock+0x20/0x30 [1593913.907329] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593913.907339] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593913.907363] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593913.907377] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593913.907392] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593913.907407] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593913.907422] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593913.907425] [] ? wake_up_state+0x20/0x20 [1593913.907440] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593913.907442] [] kthread+0xd1/0xe0 [1593913.907444] [] ? insert_kthread_work+0x40/0x40 [1593913.907446] [] ret_from_fork_nospec_begin+0x7/0x21 [1593913.907447] [] ? insert_kthread_work+0x40/0x40 [1593913.907461] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593921.816267] NMI watchdog: BUG: soft lockup - CPU#6 stuck for 22s! [ldlm_bl_115:150683] [1593921.817148] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593921.817186] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593921.817225] CPU: 6 PID: 150683 Comm: ldlm_bl_115 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593921.817227] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593921.817228] task: ffff9dbf390630c0 ti: ffff9dba86fd8000 task.ti: ffff9dba86fd8000 [1593921.817230] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1593921.817239] RSP: 0018:ffff9dba86fdbc70 EFLAGS: 00000246 [1593921.817240] RAX: 0000000000000000 RBX: ffff9daf13229cc0 RCX: 0000000000310000 [1593921.817241] RDX: ffff9daf3e75b780 RSI: 0000000000290001 RDI: ffff9da30dea8058 [1593921.817242] RBP: ffff9dba86fdbc70 R08: ffff9daf3e79b780 R09: 0000000000000000 [1593921.817243] R10: 0000000000000000 R11: fffff902d66cfa00 R12: ffff9dba86fdbc38 [1593921.817244] R13: ffff9db748a1e150 R14: ffff9db7d3ed4008 R15: ffff9db748a1e1e8 [1593921.817245] FS: 0000000000000000(0000) GS:ffff9daf3e780000(0000) knlGS:0000000000000000 [1593921.817247] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593921.817248] CR2: 0000000001b30658 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593921.817249] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593921.817250] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593921.817251] Call Trace: [1593921.817258] [] queued_spin_lock_slowpath+0xb/0xf [1593921.817263] [] _raw_spin_lock+0x20/0x30 [1593921.817302] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593921.817315] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593921.817343] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593921.817357] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593921.817373] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593921.817388] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593921.817402] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593921.817406] [] ? wake_up_state+0x20/0x20 [1593921.817420] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593921.817423] [] kthread+0xd1/0xe0 [1593921.817425] [] ? insert_kthread_work+0x40/0x40 [1593921.817428] [] ret_from_fork_nospec_begin+0x7/0x21 [1593921.817430] [] ? insert_kthread_work+0x40/0x40 [1593921.817431] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1593921.912267] NMI watchdog: BUG: soft lockup - CPU#20 stuck for 22s! [ldlm_bl_43:150605] [1593921.912299] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593921.912331] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593921.912334] CPU: 20 PID: 150605 Comm: ldlm_bl_43 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593921.912334] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593921.912335] task: ffff9dbb883f5140 ti: ffff9dbb4467c000 task.ti: ffff9dbb4467c000 [1593921.912342] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x15b/0x200 [1593921.912343] RSP: 0018:ffff9dbb4467fc70 EFLAGS: 00000202 [1593921.912344] RAX: 0000000000000001 RBX: ffff9daf13228f00 RCX: 0000000000a10000 [1593921.912344] RDX: 0000000000690001 RSI: 0000000000310001 RDI: ffff9da30dea8058 [1593921.912345] RBP: ffff9dbb4467fc70 R08: ffff9dbf3e01b780 R09: ffff9dbf3e05b780 [1593921.912345] R10: 0000000000000000 R11: fffff902d9af6280 R12: ffff9dbb4467fc38 [1593921.912346] R13: ffff9dbe8baf8ad0 R14: ffff9dba6f2ca758 R15: ffff9dbe8baf8b68 [1593921.912347] FS: 0000000000000000(0000) GS:ffff9dbf3e000000(0000) knlGS:0000000000000000 [1593921.912347] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593921.912348] CR2: 000000c000de5000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593921.912349] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593921.912349] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593921.912350] Call Trace: [1593921.912354] [] queued_spin_lock_slowpath+0xb/0xf [1593921.912358] [] _raw_spin_lock+0x20/0x30 [1593921.912388] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593921.912397] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593921.912421] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593921.912435] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593921.912450] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593921.912464] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593921.912478] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593921.912480] [] ? wake_up_state+0x20/0x20 [1593921.912494] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593921.912496] [] kthread+0xd1/0xe0 [1593921.912497] [] ? insert_kthread_work+0x40/0x40 [1593921.912499] [] ret_from_fork_nospec_begin+0x7/0x21 [1593921.912501] [] ? insert_kthread_work+0x40/0x40 [1593921.912514] Code: 74 04 41 0f 18 09 8b 17 0f b7 c2 85 c0 74 21 83 f8 03 75 10 eb 1a 66 2e 0f 1f 84 00 00 00 00 00 85 c0 74 0c f3 90 8b 17 0f b7 c2 <83> f8 03 75 f0 be 01 00 00 00 eb 15 66 0f 1f 84 00 00 00 00 00 [1593933.894358] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_28:148141] [1593933.895470] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1593933.895511] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1593933.895553] CPU: 13 PID: 148141 Comm: ldlm_bl_28 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1593933.895555] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1593933.895557] task: ffff9daf17be30c0 ti: ffff9dac518c4000 task.ti: ffff9dac518c4000 [1593933.895558] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1593933.895566] RSP: 0018:ffff9dac518c7c70 EFLAGS: 00000246 [1593933.895567] RAX: 0000000000000000 RBX: ffff9daec4dcdf40 RCX: 0000000000690000 [1593933.895569] RDX: ffff9daf3e81b780 RSI: 0000000000410001 RDI: ffff9da30dea8058 [1593933.895570] RBP: ffff9dac518c7c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [1593933.895571] R10: 0000000000000000 R11: fffff90340bdd680 R12: ffff9dac518c7c38 [1593933.895572] R13: ffff9db645af2070 R14: ffff9dba6ff5bb08 R15: ffff9db645af2108 [1593933.895573] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [1593933.895575] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1593933.895576] CR2: 00007fad55587000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1593933.895577] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1593933.895578] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1593933.895579] Call Trace: [1593933.895585] [] queued_spin_lock_slowpath+0xb/0xf [1593933.895590] [] _raw_spin_lock+0x20/0x30 [1593933.895623] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1593933.895634] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1593933.895659] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1593933.895673] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1593933.895689] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1593933.895704] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1593933.895718] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1593933.895721] [] ? wake_up_state+0x20/0x20 [1593933.895735] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1593933.895738] [] kthread+0xd1/0xe0 [1593933.895740] [] ? insert_kthread_work+0x40/0x40 [1593933.895743] [] ret_from_fork_nospec_begin+0x7/0x21 [1593933.895744] [] ? insert_kthread_work+0x40/0x40 [1593933.895745] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1593935.864751] Lustre: ai400-OST0001-osc-ffff9dbf39146800: Connection restored to 10.0.10.175@o2ib10 (at 10.0.10.175@o2ib10) [1593936.805143] Lustre: ai400-OST0003-osc-ffff9dbf39146800: Connection restored to 10.0.10.176@o2ib10 (at 10.0.10.176@o2ib10) [1593936.805148] Lustre: Skipped 2 previous similar messages [1594593.051312] Lustre: Unmounted ai400-client [1594621.525342] LNet: Removed LNI 10.0.13.150@o2ib10 [1594966.193648] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [1594966.194960] alg: No test for adler32 (adler32-zlib) [1594966.985590] Lustre: Lustre: Build Version: 2.12.58_145_gfcf219d [1594967.051890] LNet: 153683:0:(config.c:1641:lnet_inet_enumerate()) lnet: Ignoring interface dummy0: it's down [1594967.051908] LNet: Using FMR for registration [1594967.061158] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [1594968.254442] Lustre: Mounted ai400-client [1595577.825018] NMI watchdog: BUG: soft lockup - CPU#11 stuck for 22s! [ldlm_bl_19:155073] [1595577.826067] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1595577.826109] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1595577.826150] CPU: 11 PID: 155073 Comm: ldlm_bl_19 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1595577.826151] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1595577.826154] task: ffff9db93afac100 ti: ffff9dbed43a8000 task.ti: ffff9dbed43a8000 [1595577.826155] RIP: 0010:[] [] cl_object_top+0xe6/0x110 [obdclass] [1595577.826187] RSP: 0018:ffff9dbed43abc60 EFLAGS: 00000287 [1595577.826189] RAX: ffff9dbf2a31b5a0 RBX: ffff9dbed43abc38 RCX: ffff9dad22b28040 [1595577.826190] RDX: ffff9dacf4b18b20 RSI: ffff9dad22b2fac0 RDI: ffff9dad22b2fac0 [1595577.826191] RBP: ffff9dbed43abc68 R08: 000000017166a000 R09: ffff9dacf4b18ad0 [1595577.826192] R10: 0000000000000000 R11: fffff902d39aff80 R12: ffff9da876344a28 [1595577.826193] R13: ffff9dacf4b19638 R14: 0000000000015527 R15: ffff9dad22b2fac0 [1595577.826195] FS: 0000000000000000(0000) GS:ffff9daf3e8c0000(0000) knlGS:0000000000000000 [1595577.826196] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1595577.826197] CR2: 00007f91bcb4cf94 CR3: 0000001037ea8000 CR4: 00000000003607e0 [1595577.826199] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1595577.826200] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1595577.826201] Call Trace: [1595577.826218] [] cl_object_attr_update+0x26/0x150 [obdclass] [1595577.826230] [] osc_ldlm_blocking_ast+0x322/0x3a0 [osc] [1595577.826256] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1595577.826270] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1595577.826286] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1595577.826301] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1595577.826317] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1595577.826323] [] ? wake_up_state+0x20/0x20 [1595577.826338] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1595577.826343] [] kthread+0xd1/0xe0 [1595577.826345] [] ? insert_kthread_work+0x40/0x40 [1595577.826350] [] ret_from_fork_nospec_begin+0x7/0x21 [1595577.826352] [] ? insert_kthread_work+0x40/0x40 [1595577.826353] Code: c7 05 db 3f 10 00 00 00 00 00 e8 96 64 ea ff 48 89 d8 5b 5d c3 e8 da 20 01 00 48 81 fb 00 f0 ff ff 0f 87 71 ff ff ff 48 8b 43 08 <48> 8b 40 08 f6 00 04 0f 85 60 ff ff ff e8 81 20 01 00 31 db e9 [1595709.796451] Lustre: Unmounted ai400-client [1595741.572883] LNet: Removed LNI 10.0.13.150@o2ib10 [1595965.920161] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [1595965.921689] alg: No test for adler32 (adler32-zlib) [1595966.712366] Lustre: Lustre: Build Version: 2.12.58_145_gfcf219d [1595966.777254] LNet: 156627:0:(config.c:1641:lnet_inet_enumerate()) lnet: Ignoring interface dummy0: it's down [1595966.777271] LNet: Using FMR for registration [1595966.785817] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [1595967.977611] Lustre: Mounted ai400-client [1596493.814317] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ldlm_bl_14:157390] [1596493.815213] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596493.815254] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596493.815294] CPU: 4 PID: 157390 Comm: ldlm_bl_14 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596493.815296] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596493.815298] task: ffff9dba42f84100 ti: ffff9dbf2ee48000 task.ti: ffff9dbf2ee48000 [1596493.815299] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1596493.815309] RSP: 0018:ffff9dbf2ee4bc70 EFLAGS: 00000246 [1596493.815310] RAX: 0000000000000000 RBX: ffff9dbf2823a800 RCX: 0000000000210000 [1596493.815311] RDX: ffff9dbf3dfdb780 RSI: 0000000000990001 RDI: ffff9db13d2ec058 [1596493.815312] RBP: ffff9dbf2ee4bc70 R08: ffff9daf3e71b780 R09: 0000000000000000 [1596493.815313] R10: 0000000000000000 R11: fffff9032c95bc00 R12: ffff9dbf2ee4bc38 [1596493.815314] R13: ffff9da742a995a0 R14: ffff9dacf4bf30e8 R15: ffff9da742a99638 [1596493.815315] FS: 0000000000000000(0000) GS:ffff9daf3e700000(0000) knlGS:0000000000000000 [1596493.815316] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596493.815317] CR2: 0000000002968000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596493.815319] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596493.815320] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596493.815321] Call Trace: [1596493.815328] [] queued_spin_lock_slowpath+0xb/0xf [1596493.815334] [] _raw_spin_lock+0x20/0x30 [1596493.815363] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596493.815374] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596493.815398] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596493.815413] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596493.815429] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596493.815444] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596493.815460] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596493.815464] [] ? wake_up_state+0x20/0x20 [1596493.815479] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596493.815483] [] kthread+0xd1/0xe0 [1596493.815485] [] ? insert_kthread_work+0x40/0x40 [1596493.815488] [] ret_from_fork_nospec_begin+0x7/0x21 [1596493.815490] [] ? insert_kthread_work+0x40/0x40 [1596493.815491] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1596509.896462] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_13:157389] [1596509.897438] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596509.897479] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596509.897520] CPU: 13 PID: 157389 Comm: ldlm_bl_13 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596509.897522] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596509.897523] task: ffff9dba42f85140 ti: ffff9dbf276d4000 task.ti: ffff9dbf276d4000 [1596509.897525] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1596509.897533] RSP: 0018:ffff9dbf276d7c70 EFLAGS: 00000246 [1596509.897534] RAX: 0000000000000000 RBX: ffff9dbf2ab0d900 RCX: 0000000000690000 [1596509.897535] RDX: ffff9dbf3df9b780 RSI: 0000000000910001 RDI: ffff9db13d2ec058 [1596509.897536] RBP: ffff9dbf276d7c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [1596509.897537] R10: 0000000000000000 R11: fffff902dfe0b200 R12: ffff9dbf276d7c38 [1596509.897538] R13: ffff9da952e32070 R14: ffff9db31cef1208 R15: ffff9da952e32108 [1596509.897539] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [1596509.897540] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596509.897542] CR2: 00007fd93e4c33e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596509.897543] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596509.897544] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596509.897545] Call Trace: [1596509.897551] [] queued_spin_lock_slowpath+0xb/0xf [1596509.897555] [] _raw_spin_lock+0x20/0x30 [1596509.897591] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596509.897603] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596509.897633] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596509.897648] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596509.897664] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596509.897680] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596509.897696] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596509.897699] [] ? wake_up_state+0x20/0x20 [1596509.897715] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596509.897718] [] kthread+0xd1/0xe0 [1596509.897720] [] ? insert_kthread_work+0x40/0x40 [1596509.897723] [] ret_from_fork_nospec_begin+0x7/0x21 [1596509.897725] [] ? insert_kthread_work+0x40/0x40 [1596509.897726] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1596521.814569] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ldlm_bl_14:157390] [1596521.815479] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596521.815518] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596521.815559] CPU: 4 PID: 157390 Comm: ldlm_bl_14 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596521.815560] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596521.815562] task: ffff9dba42f84100 ti: ffff9dbf2ee48000 task.ti: ffff9dbf2ee48000 [1596521.815563] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x158/0x200 [1596521.815573] RSP: 0018:ffff9dbf2ee4bc70 EFLAGS: 00000202 [1596521.815574] RAX: 0000000000000001 RBX: ffff9dbf2823b200 RCX: 0000000000210000 [1596521.815575] RDX: 0000000000190001 RSI: 0000000000810001 RDI: ffff9db13d2ec058 [1596521.815576] RBP: ffff9dbf2ee4bc70 R08: ffff9daf3e71b780 R09: ffff9dbf3e05b780 [1596521.815577] R10: 0000000000000000 R11: fffff902de72b380 R12: ffff9dbf2ee4bc38 [1596521.815578] R13: ffff9da742a995a0 R14: ffff9dacf4bf30e8 R15: ffff9da742a99638 [1596521.815580] FS: 0000000000000000(0000) GS:ffff9daf3e700000(0000) knlGS:0000000000000000 [1596521.815581] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596521.815582] CR2: 0000000002968000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596521.815583] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596521.815584] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596521.815586] Call Trace: [1596521.815592] [] queued_spin_lock_slowpath+0xb/0xf [1596521.815597] [] _raw_spin_lock+0x20/0x30 [1596521.815628] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596521.815639] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596521.815665] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596521.815679] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596521.815695] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596521.815711] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596521.815727] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596521.815731] [] ? wake_up_state+0x20/0x20 [1596521.815746] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596521.815749] [] kthread+0xd1/0xe0 [1596521.815752] [] ? insert_kthread_work+0x40/0x40 [1596521.815755] [] ret_from_fork_nospec_begin+0x7/0x21 [1596521.815757] [] ? insert_kthread_work+0x40/0x40 [1596521.815758] Code: 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 85 c0 74 21 83 f8 03 75 10 eb 1a 66 2e 0f 1f 84 00 00 00 00 00 85 c0 74 0c f3 90 8b 17 <0f> b7 c2 83 f8 03 75 f0 be 01 00 00 00 eb 15 66 0f 1f 84 00 00 [1596521.904569] NMI watchdog: BUG: soft lockup - CPU#16 stuck for 22s! [ldlm_bl_01:156714] [1596521.905531] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596521.905572] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596521.905612] CPU: 16 PID: 156714 Comm: ldlm_bl_01 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596521.905614] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596521.905616] task: ffff9daf1b2aa080 ti: ffff9dbefea50000 task.ti: ffff9dbefea50000 [1596521.905617] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1596521.905626] RSP: 0018:ffff9dbefea53c70 EFLAGS: 00000246 [1596521.905627] RAX: 0000000000000000 RBX: ffff9db83e3bf200 RCX: 0000000000810000 [1596521.905628] RDX: ffff9dbf3e01b780 RSI: 0000000000a10001 RDI: ffff9db13d2ec058 [1596521.905629] RBP: ffff9dbefea53c70 R08: ffff9dbf3df1b780 R09: 0000000000000000 [1596521.905630] R10: 0000000000000000 R11: fffff902fdf1d200 R12: ffff9dbefea53c38 [1596521.905631] R13: ffff9dbbc1eb0000 R14: ffff9dbbb2722008 R15: ffff9dbbc1eb0098 [1596521.905633] FS: 0000000000000000(0000) GS:ffff9dbf3df00000(0000) knlGS:0000000000000000 [1596521.905634] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596521.905635] CR2: 00007f2865b5e3e0 CR3: 000000202879c000 CR4: 00000000003607e0 [1596521.905636] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596521.905637] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596521.905638] Call Trace: [1596521.905644] [] queued_spin_lock_slowpath+0xb/0xf [1596521.905649] [] _raw_spin_lock+0x20/0x30 [1596521.905683] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596521.905695] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596521.905724] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596521.905738] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596521.905754] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596521.905769] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596521.905784] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596521.905787] [] ? wake_up_state+0x20/0x20 [1596521.905801] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596521.905805] [] kthread+0xd1/0xe0 [1596521.905807] [] ? insert_kthread_work+0x40/0x40 [1596521.905810] [] ret_from_fork_nospec_begin+0x7/0x21 [1596521.905811] [] ? insert_kthread_work+0x40/0x40 [1596521.905812] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1596525.916605] NMI watchdog: BUG: soft lockup - CPU#21 stuck for 22s! [ldlm_bl_16:157400] [1596525.917578] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596525.917619] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596525.917661] CPU: 21 PID: 157400 Comm: ldlm_bl_16 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596525.917663] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596525.917665] task: ffff9da068908000 ti: ffff9dbf3bc0c000 task.ti: ffff9dbf3bc0c000 [1596525.917666] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1596525.917675] RSP: 0000:ffff9dbf3bc0fc70 EFLAGS: 00000246 [1596525.917676] RAX: 0000000000000000 RBX: ffff9db83e3bd2c0 RCX: 0000000000a90000 [1596525.917677] RDX: ffff9daf3e75b780 RSI: 0000000000290001 RDI: ffff9db13d2ec058 [1596525.917678] RBP: ffff9dbf3bc0fc70 R08: ffff9dbf3e05b780 R09: 0000000000000000 [1596525.917679] R10: 0000000000000000 R11: fffff902f6f9c780 R12: ffff9dbf3bc0fc38 [1596525.917680] R13: ffff9db6b7e82b40 R14: ffff9dbbb2723e68 R15: ffff9db6b7e82bd8 [1596525.917681] FS: 0000000000000000(0000) GS:ffff9dbf3e040000(0000) knlGS:0000000000000000 [1596525.917683] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596525.917684] CR2: 0000000000402f90 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596525.917685] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596525.917686] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596525.917687] Call Trace: [1596525.917693] [] queued_spin_lock_slowpath+0xb/0xf [1596525.917697] [] _raw_spin_lock+0x20/0x30 [1596525.917736] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596525.917748] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596525.917779] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596525.917793] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596525.917810] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596525.917825] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596525.917840] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596525.917843] [] ? wake_up_state+0x20/0x20 [1596525.917858] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596525.917860] [] kthread+0xd1/0xe0 [1596525.917862] [] ? insert_kthread_work+0x40/0x40 [1596525.917865] [] ret_from_fork_nospec_begin+0x7/0x21 [1596525.917867] [] ? insert_kthread_work+0x40/0x40 [1596525.917868] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1596528.583627] INFO: rcu_sched self-detected stall on CPU { 4} (t=60000 jiffies g=21153661 c=21153660 q=282292) [1596528.584738] Task dump for CPU 4: [1596528.584740] ldlm_bl_14 R running task 0 157390 2 0x00000088 [1596528.584743] Call Trace: [1596528.584745] [] sched_show_task+0xa8/0x110 [1596528.584755] [] dump_cpu_task+0x39/0x70 [1596528.584759] [] rcu_dump_cpu_stacks+0x90/0xd0 [1596528.584761] [] rcu_check_callbacks+0x442/0x730 [1596528.584766] [] ? tick_sched_do_timer+0x50/0x50 [1596528.584771] [] update_process_times+0x46/0x80 [1596528.584773] [] tick_sched_handle+0x30/0x70 [1596528.584775] [] tick_sched_timer+0x39/0x80 [1596528.584781] [] __hrtimer_run_queues+0xf3/0x270 [1596528.584783] [] hrtimer_interrupt+0xaf/0x1d0 [1596528.584790] [] local_apic_timer_interrupt+0x3b/0x60 [1596528.584794] [] smp_apic_timer_interrupt+0x43/0x60 [1596528.584798] [] apic_timer_interrupt+0x162/0x170 [1596528.584800] [] ? native_queued_spin_lock_slowpath+0x122/0x200 [1596528.584807] [] queued_spin_lock_slowpath+0xb/0xf [1596528.584811] [] _raw_spin_lock+0x20/0x30 [1596528.584844] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596528.584855] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596528.584879] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596528.584893] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596528.584909] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596528.584925] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596528.584941] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596528.584943] [] ? wake_up_state+0x20/0x20 [1596528.584958] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596528.584961] [] kthread+0xd1/0xe0 [1596528.584963] [] ? insert_kthread_work+0x40/0x40 [1596528.584966] [] ret_from_fork_nospec_begin+0x7/0x21 [1596528.584968] [] ? insert_kthread_work+0x40/0x40 [1596529.894641] NMI watchdog: BUG: soft lockup - CPU#12 stuck for 24s! [ldlm_bl_17:157401] [1596529.895651] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596529.895692] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596529.895733] CPU: 12 PID: 157401 Comm: ldlm_bl_17 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596529.895735] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596529.895736] task: ffff9daf382e2080 ti: ffff9dbc3eeec000 task.ti: ffff9dbc3eeec000 [1596529.895738] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1596529.895746] RSP: 0018:ffff9dbc3eeefc70 EFLAGS: 00000246 [1596529.895747] RAX: 0000000000000000 RBX: ffff9dba0c61c780 RCX: 0000000000610000 [1596529.895748] RDX: ffff9daf3e81b780 RSI: 0000000000410001 RDI: ffff9db13d2ec058 [1596529.895749] RBP: ffff9dbc3eeefc70 R08: ffff9dbf3de1b780 R09: 0000000000000000 [1596529.895750] R10: 0000000000000000 R11: fffff902f977cf00 R12: ffff9dbc3eeefc38 [1596529.895751] R13: ffff9da952e30ad0 R14: ffff9db31cef0bd8 R15: ffff9da952e30b68 [1596529.895753] FS: 0000000000000000(0000) GS:ffff9dbf3de00000(0000) knlGS:0000000000000000 [1596529.895754] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596529.895755] CR2: 00007f2af6cd43e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596529.895756] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596529.895757] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596529.895758] Call Trace: [1596529.895764] [] queued_spin_lock_slowpath+0xb/0xf [1596529.895769] [] _raw_spin_lock+0x20/0x30 [1596529.895806] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596529.895818] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596529.895849] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596529.895863] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596529.895880] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596529.895896] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596529.895912] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596529.895915] [] ? wake_up_state+0x20/0x20 [1596529.895931] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596529.895934] [] kthread+0xd1/0xe0 [1596529.895936] [] ? insert_kthread_work+0x40/0x40 [1596529.895939] [] ret_from_fork_nospec_begin+0x7/0x21 [1596529.895941] [] ? insert_kthread_work+0x40/0x40 [1596529.895942] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1596537.812710] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 23s! [ldlm_bl_20:157410] [1596537.813657] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596537.813698] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596537.813739] CPU: 3 PID: 157410 Comm: ldlm_bl_20 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596537.813740] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596537.813742] task: ffff9dafe91d6180 ti: ffff9db756218000 task.ti: ffff9db756218000 [1596537.813744] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1596537.813753] RSP: 0018:ffff9db75621bc70 EFLAGS: 00000246 [1596537.813754] RAX: 0000000000000000 RBX: ffff9dbf2ab0de00 RCX: 0000000000190000 [1596537.813756] RDX: ffff9daf3e75b780 RSI: 0000000000290001 RDI: ffff9db13d2ec058 [1596537.813757] RBP: ffff9db75621bc70 R08: ffff9daf3e6db780 R09: 0000000000000000 [1596537.813758] R10: 0000000000000000 R11: fffff902ff62e280 R12: ffff9db75621bc38 [1596537.813758] R13: ffff9dbef72eab40 R14: ffff9dbcf820ca28 R15: ffff9dbef72eabd8 [1596537.813760] FS: 0000000000000000(0000) GS:ffff9daf3e6c0000(0000) knlGS:0000000000000000 [1596537.813761] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596537.813762] CR2: 0000000000cdb000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596537.813764] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596537.813765] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596537.813766] Call Trace: [1596537.813773] [] queued_spin_lock_slowpath+0xb/0xf [1596537.813778] [] _raw_spin_lock+0x20/0x30 [1596537.813809] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596537.813820] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596537.813847] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596537.813862] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596537.813878] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596537.813894] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596537.813910] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596537.813914] [] ? wake_up_state+0x20/0x20 [1596537.813929] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596537.813933] [] kthread+0xd1/0xe0 [1596537.813936] [] ? insert_kthread_work+0x40/0x40 [1596537.813939] [] ret_from_fork_nospec_begin+0x7/0x21 [1596537.813941] [] ? insert_kthread_work+0x40/0x40 [1596537.813942] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1596537.896710] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_13:157389] [1596537.897700] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596537.897740] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596537.897780] CPU: 13 PID: 157389 Comm: ldlm_bl_13 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596537.897782] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596537.897783] task: ffff9dba42f85140 ti: ffff9dbf276d4000 task.ti: ffff9dbf276d4000 [1596537.897785] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1596537.897792] RSP: 0018:ffff9dbf276d7c70 EFLAGS: 00000246 [1596537.897793] RAX: 0000000000000000 RBX: ffff9dbf3727a080 RCX: 0000000000690000 [1596537.897794] RDX: ffff9dbf3dfdb780 RSI: 0000000000990001 RDI: ffff9db13d2ec058 [1596537.897795] RBP: ffff9dbf276d7c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [1596537.897796] R10: 0000000000000000 R11: fffff902d3378f80 R12: ffff9dbf276d7c38 [1596537.897797] R13: ffff9da952e32070 R14: ffff9db31cef1208 R15: ffff9da952e32108 [1596537.897799] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [1596537.897800] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596537.897801] CR2: 00007fd93e4c33e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596537.897802] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596537.897803] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596537.897804] Call Trace: [1596537.897809] [] queued_spin_lock_slowpath+0xb/0xf [1596537.897814] [] _raw_spin_lock+0x20/0x30 [1596537.897850] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596537.897863] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596537.897894] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596537.897909] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596537.897925] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596537.897941] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596537.897957] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596537.897960] [] ? wake_up_state+0x20/0x20 [1596537.897975] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596537.897978] [] kthread+0xd1/0xe0 [1596537.897980] [] ? insert_kthread_work+0x40/0x40 [1596537.897983] [] ret_from_fork_nospec_begin+0x7/0x21 [1596537.897985] [] ? insert_kthread_work+0x40/0x40 [1596537.897986] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1596549.904795] NMI watchdog: BUG: soft lockup - CPU#16 stuck for 22s! [ldlm_bl_01:156714] [1596549.905933] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596549.905974] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596549.906016] CPU: 16 PID: 156714 Comm: ldlm_bl_01 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596549.906017] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596549.906019] task: ffff9daf1b2aa080 ti: ffff9dbefea50000 task.ti: ffff9dbefea50000 [1596549.906021] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x1ce/0x200 [1596549.906028] RSP: 0018:ffff9dbefea53d40 EFLAGS: 00000202 [1596549.906029] RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000000001 [1596549.906030] RDX: 0000000000000101 RSI: 0000000000000001 RDI: ffff9dbc20b100dc [1596549.906032] RBP: ffff9dbefea53d40 R08: 0000000000000101 R09: 0000000100400016 [1596549.906033] R10: 00000000f82ce301 R11: fffff902dfe0b380 R12: ffff9da6f82cf200 [1596549.906034] R13: 00000000c3e4f774 R14: 0000000000000246 R15: ffff9da6f82bed08 [1596549.906036] FS: 0000000000000000(0000) GS:ffff9dbf3df00000(0000) knlGS:0000000000000000 [1596549.906037] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596549.906038] CR2: 00007f2865b5e3e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596549.906040] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596549.906041] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596549.906042] Call Trace: [1596549.906047] [] queued_spin_lock_slowpath+0xb/0xf [1596549.906052] [] _raw_spin_lock+0x20/0x30 [1596549.906087] [] lock_res_and_lock+0x2c/0x50 [ptlrpc] [1596549.906103] [] ldlm_cli_cancel_local+0x7a/0x3f0 [ptlrpc] [1596549.906119] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596549.906135] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596549.906139] [] ? wake_up_state+0x20/0x20 [1596549.906154] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596549.906158] [] kthread+0xd1/0xe0 [1596549.906160] [] ? insert_kthread_work+0x40/0x40 [1596549.906163] [] ret_from_fork_nospec_begin+0x7/0x21 [1596549.906165] [] ? insert_kthread_work+0x40/0x40 [1596549.906166] Code: 37 81 fe 00 01 00 00 74 f4 e9 93 fe ff ff 0f 1f 80 00 00 00 00 83 fa 01 75 11 0f 1f 00 e9 68 fe ff ff 0f 1f 00 85 c0 74 0c f3 90 <8b> 07 0f b6 c0 83 f8 03 75 f0 b8 01 00 00 00 66 89 07 5d c3 66 [1596553.814822] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ldlm_bl_14:157390] [1596553.815742] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596553.815782] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596553.815823] CPU: 4 PID: 157390 Comm: ldlm_bl_14 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596553.815824] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596553.815826] task: ffff9dba42f84100 ti: ffff9dbf2ee48000 task.ti: ffff9dbf2ee48000 [1596553.815827] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1596553.815837] RSP: 0018:ffff9dbf2ee4bc70 EFLAGS: 00000246 [1596553.815838] RAX: 0000000000000000 RBX: ffff9db83e3bf980 RCX: 0000000000210000 [1596553.815839] RDX: ffff9daf3e7db780 RSI: 0000000000390001 RDI: ffff9db13d2ec058 [1596553.815840] RBP: ffff9dbf2ee4bc70 R08: ffff9daf3e71b780 R09: 0000000000000000 [1596553.815841] R10: 0000000000000000 R11: fffff9032c7aeb80 R12: ffff9dbf2ee4bc38 [1596553.815843] R13: ffff9da742a995a0 R14: ffff9dacf4bf30e8 R15: ffff9da742a99638 [1596553.815844] FS: 0000000000000000(0000) GS:ffff9daf3e700000(0000) knlGS:0000000000000000 [1596553.815845] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596553.815846] CR2: 0000000002968000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596553.815848] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596553.815849] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596553.815850] Call Trace: [1596553.815856] [] queued_spin_lock_slowpath+0xb/0xf [1596553.815862] [] _raw_spin_lock+0x20/0x30 [1596553.815893] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596553.815904] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596553.815931] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596553.815945] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596553.815961] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596553.815975] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596553.815990] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596553.815994] [] ? wake_up_state+0x20/0x20 [1596553.816008] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596553.816011] [] kthread+0xd1/0xe0 [1596553.816013] [] ? insert_kthread_work+0x40/0x40 [1596553.816015] [] ret_from_fork_nospec_begin+0x7/0x21 [1596553.816017] [] ? insert_kthread_work+0x40/0x40 [1596553.816018] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1596553.916823] NMI watchdog: BUG: soft lockup - CPU#21 stuck for 22s! [ldlm_bl_16:157400] [1596553.917771] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596553.917810] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596553.917851] CPU: 21 PID: 157400 Comm: ldlm_bl_16 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596553.917853] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596553.917855] task: ffff9da068908000 ti: ffff9dbf3bc0c000 task.ti: ffff9dbf3bc0c000 [1596553.917856] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1596553.917864] RSP: 0000:ffff9dbf3bc0fc70 EFLAGS: 00000246 [1596553.917865] RAX: 0000000000000000 RBX: ffff9dba0c61fe80 RCX: 0000000000a90000 [1596553.917866] RDX: ffff9daf3e81b780 RSI: 0000000000410001 RDI: ffff9db13d2ec058 [1596553.917867] RBP: ffff9dbf3bc0fc70 R08: ffff9dbf3e05b780 R09: 0000000000000000 [1596553.917868] R10: 0000000000000000 R11: fffff902f9c9de80 R12: ffff9dbf3bc0fc38 [1596553.917869] R13: ffff9db6b7e82b40 R14: ffff9dbbb2723e68 R15: ffff9db6b7e82bd8 [1596553.917870] FS: 0000000000000000(0000) GS:ffff9dbf3e040000(0000) knlGS:0000000000000000 [1596553.917872] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596553.917872] CR2: 0000000000402f90 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596553.917874] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596553.917875] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596553.917876] Call Trace: [1596553.917881] [] queued_spin_lock_slowpath+0xb/0xf [1596553.917885] [] _raw_spin_lock+0x20/0x30 [1596553.917925] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596553.917937] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596553.917965] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596553.917980] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596553.917996] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596553.918012] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596553.918028] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596553.918032] [] ? wake_up_state+0x20/0x20 [1596553.918047] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596553.918051] [] kthread+0xd1/0xe0 [1596553.918053] [] ? insert_kthread_work+0x40/0x40 [1596553.918056] [] ret_from_fork_nospec_begin+0x7/0x21 [1596553.918058] [] ? insert_kthread_work+0x40/0x40 [1596553.918059] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1596557.894850] NMI watchdog: BUG: soft lockup - CPU#12 stuck for 22s! [ldlm_bl_17:157401] [1596557.895792] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596557.895833] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596557.895873] CPU: 12 PID: 157401 Comm: ldlm_bl_17 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596557.895874] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596557.895876] task: ffff9daf382e2080 ti: ffff9dbc3eeec000 task.ti: ffff9dbc3eeec000 [1596557.895878] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1596557.895885] RSP: 0018:ffff9dbc3eeefc70 EFLAGS: 00000246 [1596557.895887] RAX: 0000000000000000 RBX: ffff9db83e3bebc0 RCX: 0000000000610000 [1596557.895888] RDX: ffff9daf3e65b780 RSI: 0000000000090001 RDI: ffff9db13d2ec058 [1596557.895889] RBP: ffff9dbc3eeefc70 R08: ffff9dbf3de1b780 R09: 0000000000000000 [1596557.895890] R10: 0000000000000000 R11: fffff90330fcf500 R12: ffff9dbc3eeefc38 [1596557.895891] R13: ffff9da952e30ad0 R14: ffff9db31cef0bd8 R15: ffff9da952e30b68 [1596557.895892] FS: 0000000000000000(0000) GS:ffff9dbf3de00000(0000) knlGS:0000000000000000 [1596557.895893] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596557.895894] CR2: 00007f2af6cd43e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596557.895895] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596557.895896] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596557.895897] Call Trace: [1596557.895902] [] queued_spin_lock_slowpath+0xb/0xf [1596557.895907] [] _raw_spin_lock+0x20/0x30 [1596557.895944] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596557.895956] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596557.895987] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596557.896002] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596557.896019] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596557.896035] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596557.896051] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596557.896054] [] ? wake_up_state+0x20/0x20 [1596557.896070] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596557.896073] [] kthread+0xd1/0xe0 [1596557.896075] [] ? insert_kthread_work+0x40/0x40 [1596557.896078] [] ret_from_fork_nospec_begin+0x7/0x21 [1596557.896080] [] ? insert_kthread_work+0x40/0x40 [1596557.896081] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1596565.812907] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [ldlm_bl_20:157410] [1596565.813767] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596565.813807] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596565.813847] CPU: 3 PID: 157410 Comm: ldlm_bl_20 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596565.813849] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596565.813851] task: ffff9dafe91d6180 ti: ffff9db756218000 task.ti: ffff9db756218000 [1596565.813852] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x158/0x200 [1596565.813863] RSP: 0018:ffff9db75621bc70 EFLAGS: 00000202 [1596565.813864] RAX: 0000000000000001 RBX: ffff9db83e3bf340 RCX: 0000000000190000 [1596565.813865] RDX: 0000000000990001 RSI: 0000000000a10001 RDI: ffff9db13d2ec058 [1596565.813866] RBP: ffff9db75621bc70 R08: ffff9daf3e6db780 R09: ffff9dbf3e0db780 [1596565.813867] R10: 0000000000000000 R11: fffff902c1c40d00 R12: ffff9db75621bc38 [1596565.813868] R13: ffff9dbef72eab40 R14: ffff9dbcf820ca28 R15: ffff9dbef72eabd8 [1596565.813869] FS: 0000000000000000(0000) GS:ffff9daf3e6c0000(0000) knlGS:0000000000000000 [1596565.813870] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596565.813871] CR2: 0000000000cdb000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596565.813872] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596565.813873] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596565.813874] Call Trace: [1596565.813881] [] queued_spin_lock_slowpath+0xb/0xf [1596565.813886] [] _raw_spin_lock+0x20/0x30 [1596565.813919] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596565.813930] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596565.813957] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596565.813971] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596565.813988] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596565.814003] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596565.814019] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596565.814023] [] ? wake_up_state+0x20/0x20 [1596565.814039] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596565.814042] [] kthread+0xd1/0xe0 [1596565.814044] [] ? insert_kthread_work+0x40/0x40 [1596565.814047] [] ret_from_fork_nospec_begin+0x7/0x21 [1596565.814049] [] ? insert_kthread_work+0x40/0x40 [1596565.814050] Code: 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 85 c0 74 21 83 f8 03 75 10 eb 1a 66 2e 0f 1f 84 00 00 00 00 00 85 c0 74 0c f3 90 8b 17 <0f> b7 c2 83 f8 03 75 f0 be 01 00 00 00 eb 15 66 0f 1f 84 00 00 [1596565.896906] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_13:157389] [1596565.897788] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596565.897828] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596565.897869] CPU: 13 PID: 157389 Comm: ldlm_bl_13 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596565.897871] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596565.897872] task: ffff9dba42f85140 ti: ffff9dbf276d4000 task.ti: ffff9dbf276d4000 [1596565.897873] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1596565.897881] RSP: 0018:ffff9dbf276d7c70 EFLAGS: 00000246 [1596565.897882] RAX: 0000000000000000 RBX: ffff9dbf3727aa80 RCX: 0000000000690000 [1596565.897884] RDX: ffff9daf3e6db780 RSI: 0000000000190001 RDI: ffff9db13d2ec058 [1596565.897885] RBP: ffff9dbf276d7c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [1596565.897886] R10: 0000000000000000 R11: fffff902eb8ef280 R12: ffff9dbf276d7c38 [1596565.897887] R13: ffff9da952e32070 R14: ffff9db31cef1208 R15: ffff9da952e32108 [1596565.897888] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [1596565.897889] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596565.897890] CR2: 00007fd93e4c33e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596565.897891] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596565.897892] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596565.897893] Call Trace: [1596565.897899] [] queued_spin_lock_slowpath+0xb/0xf [1596565.897903] [] _raw_spin_lock+0x20/0x30 [1596565.897938] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596565.897949] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596565.897980] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596565.897994] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596565.898011] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596565.898027] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596565.898043] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596565.898046] [] ? wake_up_state+0x20/0x20 [1596565.898062] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596565.898065] [] kthread+0xd1/0xe0 [1596565.898067] [] ? insert_kthread_work+0x40/0x40 [1596565.898070] [] ret_from_fork_nospec_begin+0x7/0x21 [1596565.898071] [] ? insert_kthread_work+0x40/0x40 [1596565.898072] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1596566.855714] LustreError: 11-0: ai400-OST0002-osc-ffff9dbf3796e800: operation ost_read to node 10.0.10.176@o2ib10 failed: rc = -107 [1596566.857264] Lustre: ai400-OST0002-osc-ffff9dbf3796e800: Connection to ai400-OST0002 (at 10.0.10.176@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [1596566.859044] LustreError: 167-0: ai400-OST0002-osc-ffff9dbf3796e800: This client was evicted by ai400-OST0002; in progress operations using this service will fail. [1596569.816934] NMI watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [ldlm_bl_19:157406] [1596569.817790] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596569.817831] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596569.817872] CPU: 5 PID: 157406 Comm: ldlm_bl_19 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596569.817874] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596569.817876] task: ffff9db9f3ee6180 ti: ffff9db8a9bac000 task.ti: ffff9db8a9bac000 [1596569.817877] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1596569.817886] RSP: 0000:ffff9db8a9bafc70 EFLAGS: 00000246 [1596569.817888] RAX: 0000000000000000 RBX: ffff9dba0c61c640 RCX: 0000000000290000 [1596569.817889] RDX: ffff9dbf3e01b780 RSI: 0000000000a10001 RDI: ffff9db13d2ec058 [1596569.817890] RBP: ffff9db8a9bafc70 R08: ffff9daf3e75b780 R09: 0000000000000000 [1596569.817891] R10: 0000000000000000 R11: fffff902f6511900 R12: ffff9db8a9bafc38 [1596569.817892] R13: ffff9dbc73286c20 R14: ffff9da9f017f7a8 R15: ffff9dbc73286cb8 [1596569.817893] FS: 0000000000000000(0000) GS:ffff9daf3e740000(0000) knlGS:0000000000000000 [1596569.817894] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596569.817895] CR2: 00000000004053f0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596569.817897] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596569.817898] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596569.817899] Call Trace: [1596569.817905] [] queued_spin_lock_slowpath+0xb/0xf [1596569.817911] [] _raw_spin_lock+0x20/0x30 [1596569.817943] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596569.817955] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596569.817983] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596569.817997] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596569.818013] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596569.818029] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596569.818044] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596569.818048] [] ? wake_up_state+0x20/0x20 [1596569.818063] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596569.818067] [] kthread+0xd1/0xe0 [1596569.818070] [] ? insert_kthread_work+0x40/0x40 [1596569.818073] [] ret_from_fork_nospec_begin+0x7/0x21 [1596569.818075] [] ? insert_kthread_work+0x40/0x40 [1596569.818076] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1596577.674466] Lustre: ai400-OST0003-osc-ffff9dbf3796e800: Connection to ai400-OST0003 (at 10.0.10.176@o2ib10) was lost; in progress operations using this service will wait for recovery to complete [1596577.674662] LustreError: 167-0: ai400-OST0003-osc-ffff9dbf3796e800: This client was evicted by ai400-OST0003; in progress operations using this service will fail. [1596577.904991] NMI watchdog: BUG: soft lockup - CPU#16 stuck for 23s! [ldlm_bl_01:156714] [1596577.905956] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596577.905995] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596577.906036] CPU: 16 PID: 156714 Comm: ldlm_bl_01 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596577.906037] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596577.906039] task: ffff9daf1b2aa080 ti: ffff9dbefea50000 task.ti: ffff9dbefea50000 [1596577.906040] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1596577.906048] RSP: 0018:ffff9dbefea53c70 EFLAGS: 00000246 [1596577.906049] RAX: 0000000000000000 RBX: ffff9dbf28238c80 RCX: 0000000000810000 [1596577.906050] RDX: ffff9daf3e8db780 RSI: 0000000000590001 RDI: ffff9db13d2ec058 [1596577.906051] RBP: ffff9dbefea53c70 R08: ffff9dbf3df1b780 R09: 0000000000000000 [1596577.906052] R10: 0000000000000000 R11: fffff902e3ed9280 R12: ffff9dbefea53c38 [1596577.906053] R13: ffff9dbbc1eb0000 R14: ffff9dbbb2722008 R15: ffff9dbbc1eb0098 [1596577.906055] FS: 0000000000000000(0000) GS:ffff9dbf3df00000(0000) knlGS:0000000000000000 [1596577.906056] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596577.906057] CR2: 00007f2865b5e3e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596577.906059] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596577.906060] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596577.906060] Call Trace: [1596577.906066] [] queued_spin_lock_slowpath+0xb/0xf [1596577.906070] [] _raw_spin_lock+0x20/0x30 [1596577.906106] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596577.906119] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596577.906149] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596577.906163] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596577.906180] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596577.906196] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596577.906212] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596577.906215] [] ? wake_up_state+0x20/0x20 [1596577.906230] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596577.906233] [] kthread+0xd1/0xe0 [1596577.906235] [] ? insert_kthread_work+0x40/0x40 [1596577.906238] [] ret_from_fork_nospec_begin+0x7/0x21 [1596577.906240] [] ? insert_kthread_work+0x40/0x40 [1596577.906241] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1596581.815018] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [ldlm_bl_14:157390] [1596581.816101] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596581.816142] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596581.816183] CPU: 4 PID: 157390 Comm: ldlm_bl_14 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596581.816185] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596581.816187] task: ffff9dba42f84100 ti: ffff9dbf2ee48000 task.ti: ffff9dbf2ee48000 [1596581.816189] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1596581.816199] RSP: 0018:ffff9dbf2ee4bc70 EFLAGS: 00000246 [1596581.816200] RAX: 0000000000000000 RBX: ffff9dbf372797c0 RCX: 0000000000210000 [1596581.816201] RDX: ffff9dbf3dfdb780 RSI: 0000000000990001 RDI: ffff9db13d2ec058 [1596581.816202] RBP: ffff9dbf2ee4bc70 R08: ffff9daf3e71b780 R09: 0000000000000000 [1596581.816203] R10: 0000000000000000 R11: fffff9033c556300 R12: ffff9dbf2ee4bc38 [1596581.816205] R13: ffff9da742a995a0 R14: ffff9dacf4bf30e8 R15: ffff9da742a99638 [1596581.816206] FS: 0000000000000000(0000) GS:ffff9daf3e700000(0000) knlGS:0000000000000000 [1596581.816207] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596581.816209] CR2: 0000000002968000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596581.816210] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596581.816212] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596581.816213] Call Trace: [1596581.816219] [] queued_spin_lock_slowpath+0xb/0xf [1596581.816225] [] _raw_spin_lock+0x20/0x30 [1596581.816257] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596581.816268] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596581.816295] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596581.816310] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596581.816327] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596581.816342] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596581.816358] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596581.816361] [] ? wake_up_state+0x20/0x20 [1596581.816376] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596581.816379] [] kthread+0xd1/0xe0 [1596581.816381] [] ? insert_kthread_work+0x40/0x40 [1596581.816384] [] ret_from_fork_nospec_begin+0x7/0x21 [1596581.816385] [] ? insert_kthread_work+0x40/0x40 [1596581.816386] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1596581.917017] NMI watchdog: BUG: soft lockup - CPU#21 stuck for 22s! [ldlm_bl_16:157400] [1596581.917973] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596581.918014] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596581.918054] CPU: 21 PID: 157400 Comm: ldlm_bl_16 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596581.918056] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596581.918058] task: ffff9da068908000 ti: ffff9dbf3bc0c000 task.ti: ffff9dbf3bc0c000 [1596581.918059] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1596581.918067] RSP: 0000:ffff9dbf3bc0fc70 EFLAGS: 00000246 [1596581.918068] RAX: 0000000000000000 RBX: ffff9db83e3be6c0 RCX: 0000000000a90000 [1596581.918069] RDX: ffff9daf3e75b780 RSI: 0000000000290001 RDI: ffff9db13d2ec058 [1596581.918070] RBP: ffff9dbf3bc0fc70 R08: ffff9dbf3e05b780 R09: 0000000000000000 [1596581.918071] R10: 0000000000000000 R11: fffff902e6d7f700 R12: ffff9dbf3bc0fc38 [1596581.918072] R13: ffff9db6b7e82b40 R14: ffff9dbbb2723e68 R15: ffff9db6b7e82bd8 [1596581.918073] FS: 0000000000000000(0000) GS:ffff9dbf3e040000(0000) knlGS:0000000000000000 [1596581.918074] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596581.918076] CR2: 0000000000402f90 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596581.918077] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596581.918078] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596581.918079] Call Trace: [1596581.918084] [] queued_spin_lock_slowpath+0xb/0xf [1596581.918089] [] _raw_spin_lock+0x20/0x30 [1596581.918126] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596581.918139] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596581.918170] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596581.918185] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596581.918201] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596581.918217] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596581.918233] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596581.918237] [] ? wake_up_state+0x20/0x20 [1596581.918252] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596581.918255] [] kthread+0xd1/0xe0 [1596581.918257] [] ? insert_kthread_work+0x40/0x40 [1596581.918260] [] ret_from_fork_nospec_begin+0x7/0x21 [1596581.918262] [] ? insert_kthread_work+0x40/0x40 [1596581.918263] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1596585.895046] NMI watchdog: BUG: soft lockup - CPU#12 stuck for 22s! [ldlm_bl_17:157401] [1596585.896021] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596585.896062] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596585.896103] CPU: 12 PID: 157401 Comm: ldlm_bl_17 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596585.896104] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596585.896106] task: ffff9daf382e2080 ti: ffff9dbc3eeec000 task.ti: ffff9dbc3eeec000 [1596585.896107] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x154/0x200 [1596585.896115] RSP: 0018:ffff9dbc3eeefc70 EFLAGS: 00000202 [1596585.896116] RAX: 0000000000000001 RBX: ffff9dbf28239a40 RCX: 0000000000610000 [1596585.896117] RDX: 0000000000690001 RSI: 0000000000410001 RDI: ffff9db13d2ec058 [1596585.896118] RBP: ffff9dbc3eeefc70 R08: ffff9dbf3de1b780 R09: ffff9dbf3e01b780 [1596585.896119] R10: 0000000000000000 R11: ffffffffffffff9c R12: ffff9dbc3eeefc38 [1596585.896120] R13: ffff9da952e30ad0 R14: ffff9db31cef0bd8 R15: ffff9da952e30b68 [1596585.896121] FS: 0000000000000000(0000) GS:ffff9dbf3de00000(0000) knlGS:0000000000000000 [1596585.896122] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596585.896123] CR2: 00007f2af6cd43e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596585.896125] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596585.896126] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596585.896127] Call Trace: [1596585.896132] [] queued_spin_lock_slowpath+0xb/0xf [1596585.896136] [] _raw_spin_lock+0x20/0x30 [1596585.896173] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596585.896185] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596585.896216] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596585.896231] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596585.896247] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596585.896263] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596585.896279] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596585.896283] [] ? wake_up_state+0x20/0x20 [1596585.896299] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596585.896301] [] kthread+0xd1/0xe0 [1596585.896303] [] ? insert_kthread_work+0x40/0x40 [1596585.896306] [] ret_from_fork_nospec_begin+0x7/0x21 [1596585.896308] [] ? insert_kthread_work+0x40/0x40 [1596585.896309] Code: f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 85 c0 74 21 83 f8 03 75 10 eb 1a 66 2e 0f 1f 84 00 00 00 00 00 85 c0 74 0c 90 8b 17 0f b7 c2 83 f8 03 75 f0 be 01 00 00 00 eb 15 66 0f [1596593.813102] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [ldlm_bl_20:157410] [1596593.814182] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596593.814224] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596593.814266] CPU: 3 PID: 157410 Comm: ldlm_bl_20 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596593.814267] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596593.814269] task: ffff9dafe91d6180 ti: ffff9db756218000 task.ti: ffff9db756218000 [1596593.814271] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1596593.814280] RSP: 0018:ffff9db75621bc70 EFLAGS: 00000246 [1596593.814282] RAX: 0000000000000000 RBX: ffff9db83e3bcb40 RCX: 0000000000190000 [1596593.814283] RDX: ffff9dbf3de1b780 RSI: 0000000000610001 RDI: ffff9db13d2ec058 [1596593.814284] RBP: ffff9db75621bc70 R08: ffff9daf3e6db780 R09: 0000000000000000 [1596593.814285] R10: 0000000000000000 R11: fffff902dd0bae80 R12: ffff9db75621bc38 [1596593.814286] R13: ffff9dbef72eab40 R14: ffff9dbcf820ca28 R15: ffff9dbef72eabd8 [1596593.814288] FS: 0000000000000000(0000) GS:ffff9daf3e6c0000(0000) knlGS:0000000000000000 [1596593.814290] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596593.814291] CR2: 0000000000cdb000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596593.814292] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596593.814294] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596593.814295] Call Trace: [1596593.814302] [] queued_spin_lock_slowpath+0xb/0xf [1596593.814307] [] _raw_spin_lock+0x20/0x30 [1596593.814339] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596593.814350] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596593.814375] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596593.814390] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596593.814406] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596593.814422] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596593.814438] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596593.814442] [] ? wake_up_state+0x20/0x20 [1596593.814457] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596593.814460] [] kthread+0xd1/0xe0 [1596593.814462] [] ? insert_kthread_work+0x40/0x40 [1596593.814465] [] ret_from_fork_nospec_begin+0x7/0x21 [1596593.814467] [] ? insert_kthread_work+0x40/0x40 [1596593.814468] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1596593.897102] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 22s! [ldlm_bl_13:157389] [1596593.898185] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1596593.898225] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1596593.898267] CPU: 13 PID: 157389 Comm: ldlm_bl_13 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1596593.898268] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1596593.898270] task: ffff9dba42f85140 ti: ffff9dbf276d4000 task.ti: ffff9dbf276d4000 [1596593.898271] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1596593.898279] RSP: 0018:ffff9dbf276d7c70 EFLAGS: 00000246 [1596593.898280] RAX: 0000000000000000 RBX: ffff9dbf2823b0c0 RCX: 0000000000690000 [1596593.898282] RDX: ffff9daf3e6db780 RSI: 0000000000190001 RDI: ffff9db13d2ec058 [1596593.898283] RBP: ffff9dbf276d7c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [1596593.898284] R10: 0000000000000000 R11: fffff90320808580 R12: ffff9dbf276d7c38 [1596593.898285] R13: ffff9da952e32070 R14: ffff9db31cef1208 R15: ffff9da952e32108 [1596593.898287] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [1596593.898288] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1596593.898289] CR2: 00007fd93e4c33e0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1596593.898291] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1596593.898292] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1596593.898293] Call Trace: [1596593.898298] [] queued_spin_lock_slowpath+0xb/0xf [1596593.898303] [] _raw_spin_lock+0x20/0x30 [1596593.898340] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1596593.898352] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1596593.898383] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1596593.898397] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1596593.898414] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1596593.898430] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1596593.898446] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1596593.898449] [] ? wake_up_state+0x20/0x20 [1596593.898464] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1596593.898467] [] kthread+0xd1/0xe0 [1596593.898469] [] ? insert_kthread_work+0x40/0x40 [1596593.898472] [] ret_from_fork_nospec_begin+0x7/0x21 [1596593.898474] [] ? insert_kthread_work+0x40/0x40 [1596593.898475] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1596599.678642] LustreError: 158098:0:(ldlm_resource.c:1147:ldlm_resource_complain()) ai400-OST0002-osc-ffff9dbf3796e800: namespace resource [0x10:0x0:0x0].0x0 (ffff9daf1caae840) refcount nonzero (1760) after lock cleanup; forcing cleanup. [1596599.680596] Lustre: ai400-OST0002-osc-ffff9dbf3796e800: Connection restored to 10.0.10.176@o2ib10 (at 10.0.10.176@o2ib10) [1596601.524960] LustreError: 158108:0:(ldlm_resource.c:1147:ldlm_resource_complain()) ai400-OST0003-osc-ffff9dbf3796e800: namespace resource [0x10:0x0:0x0].0x0 (ffff9db6e2f50b40) refcount nonzero (2193) after lock cleanup; forcing cleanup. [1596601.526773] LustreError: 158108:0:(ldlm_resource.c:1147:ldlm_resource_complain()) Skipped 59 previous similar messages [1596601.527940] Lustre: ai400-OST0003-osc-ffff9dbf3796e800: Connection restored to 10.0.10.176@o2ib10 (at 10.0.10.176@o2ib10) [1646711.917579] Lustre: Unmounted ai400-client [1646717.592942] LNet: Removed LNI 10.0.13.150@o2ib10 [1646927.223273] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [1646927.224651] alg: No test for adler32 (adler32-zlib) [1646928.016418] Lustre: Lustre: Build Version: 2.12.58_146_g41bc5f0 [1646928.080637] LNet: 7286:0:(config.c:1641:lnet_inet_enumerate()) lnet: Ignoring interface dummy0: it's down [1646928.080657] LNet: Using FMR for registration [1646928.089927] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [1646946.625798] Lustre: Mounted ai400-client [1647589.598478] NMI watchdog: BUG: soft lockup - CPU#8 stuck for 22s! [ldlm_bl_02:7514] [1647589.599652] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647589.599695] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647589.599736] CPU: 8 PID: 7514 Comm: ldlm_bl_02 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647589.599738] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647589.599740] task: ffff9dba42f82080 ti: ffff9dbf36d84000 task.ti: ffff9dbf36d84000 [1647589.599742] RIP: 0010:[] [] __write_lock_failed+0x7/0x20 [1647589.599750] RSP: 0000:ffff9dbf36d87c80 EFLAGS: 00000297 [1647589.599751] RAX: ffff9dbf2ca04068 RBX: 0000000000000100 RCX: ffff9dbf2ca04040 [1647589.599753] RDX: 0000000000000001 RSI: ffff9dba04740110 RDI: ffff9dbf2ca0405c [1647589.599754] RBP: ffff9dbf36d87c80 R08: ffff9da8e7d8b270 R09: 0000000000000001 [1647589.599755] R10: 0000000000000000 R11: fffff9032bd8f080 R12: ffff9dbcd9bb6080 [1647589.599756] R13: ffffffffc12cc36f R14: ffff9dbf36d87c38 R15: ffff9dae905f95a0 [1647589.599758] FS: 0000000000000000(0000) GS:ffff9daf3e800000(0000) knlGS:0000000000000000 [1647589.599759] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647589.599760] CR2: 00007f57c0025008 CR3: 00000002db6a0000 CR4: 00000000003607e0 [1647589.599762] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647589.599763] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647589.599764] Call Trace: [1647589.599772] [] _raw_write_lock+0x17/0x20 [1647589.599809] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647589.599820] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647589.599848] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647589.599863] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647589.599879] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647589.599895] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647589.599912] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647589.599916] [] ? wake_up_state+0x20/0x20 [1647589.599932] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647589.599935] [] kthread+0xd1/0xe0 [1647589.599937] [] ? insert_kthread_work+0x40/0x40 [1647589.599940] [] ret_from_fork_nospec_begin+0x7/0x21 [1647589.599942] [] ? insert_kthread_work+0x40/0x40 [1647589.599943] Code: 00 00 00 00 0f 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 90 83 3f 01 75 f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 [1647589.681479] NMI watchdog: BUG: soft lockup - CPU#17 stuck for 22s! [ldlm_bl_04:7748] [1647589.682508] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647589.682549] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647589.682590] CPU: 17 PID: 7748 Comm: ldlm_bl_04 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647589.682591] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647589.682593] task: ffff9dae5622d140 ti: ffff9dbf38738000 task.ti: ffff9dbf38738000 [1647589.682595] RIP: 0010:[] [] __write_lock_failed+0x11/0x20 [1647589.682601] RSP: 0000:ffff9dbf3873bc80 EFLAGS: 00000282 [1647589.682602] RAX: ffff9dbf2ca04068 RBX: 0000000000000220 RCX: ffff9dbf2ca04040 [1647589.682603] RDX: 0000000000000001 RSI: ffff9dba04740230 RDI: ffff9dbf2ca0405c [1647589.682604] RBP: ffff9dbf3873bc80 R08: ffff9dbf39f58850 R09: 0000000000000001 [1647589.682605] R10: 0000000000000000 R11: fffff902e3d8a380 R12: ffff9dbcd9bb5180 [1647589.682606] R13: ffffffffc12cc36f R14: ffff9dbf3873bc38 R15: ffff9dbb70b8ab40 [1647589.682608] FS: 0000000000000000(0000) GS:ffff9dbf3df40000(0000) knlGS:0000000000000000 [1647589.682609] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647589.682610] CR2: 0000000000d32b00 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647589.682611] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647589.682612] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647589.682613] Call Trace: [1647589.682619] [] _raw_write_lock+0x17/0x20 [1647589.682651] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647589.682663] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647589.682691] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647589.682706] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647589.682722] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647589.682738] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647589.682754] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647589.682757] [] ? wake_up_state+0x20/0x20 [1647589.682773] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647589.682776] [] kthread+0xd1/0xe0 [1647589.682778] [] ? insert_kthread_work+0x40/0x40 [1647589.682781] [] ret_from_fork_nospec_begin+0x7/0x21 [1647589.682782] [] ? insert_kthread_work+0x40/0x40 [1647589.682784] Code: ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 83 3f 01 75 f9 f0 ff 0f <75> f1 5d c3 90 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 f0 48 [1647597.582691] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [ldlm_bl_07:7754] [1647597.583693] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647597.583735] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647597.583775] CPU: 1 PID: 7754 Comm: ldlm_bl_07 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647597.583777] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647597.583779] task: ffff9dad66f3a080 ti: ffff9dac0b858000 task.ti: ffff9dac0b858000 [1647597.583780] RIP: 0010:[] [] __write_lock_failed+0x9/0x20 [1647597.583788] RSP: 0000:ffff9dac0b85bc80 EFLAGS: 00000297 [1647597.583789] RAX: ffff9dbf2ca04068 RBX: 0000000000000020 RCX: ffff9dbf2ca04040 [1647597.583790] RDX: 0000000000000001 RSI: ffff9dba04740030 RDI: ffff9dbf2ca0405c [1647597.583791] RBP: ffff9dac0b85bc80 R08: ffff9da8e7d8bdb0 R09: 0000000000000001 [1647597.583792] R10: 0000000000000000 R11: fffff902d025e880 R12: ffff9dbef5b0a440 [1647597.583793] R13: ffffffffc12cc36f R14: ffff9dac0b85bc38 R15: ffff9dbf32acb610 [1647597.583795] FS: 0000000000000000(0000) GS:ffff9daf3e640000(0000) knlGS:0000000000000000 [1647597.583796] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647597.583797] CR2: 000055e87d4500c0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647597.583799] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647597.583800] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647597.583801] Call Trace: [1647597.583808] [] _raw_write_lock+0x17/0x20 [1647597.583847] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647597.583859] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647597.583888] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647597.583903] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647597.583919] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647597.583935] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647597.583951] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647597.583955] [] ? wake_up_state+0x20/0x20 [1647597.583970] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647597.583974] [] kthread+0xd1/0xe0 [1647597.583976] [] ? insert_kthread_work+0x40/0x40 [1647597.583979] [] ret_from_fork_nospec_begin+0x7/0x21 [1647597.583981] [] ? insert_kthread_work+0x40/0x40 [1647597.583982] Code: 00 00 0f 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 <83> 3f 01 75 f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 00 00 [1647601.591759] NMI watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [ldlm_bl_06:7752] [1647601.592752] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647601.592795] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647601.592835] CPU: 5 PID: 7752 Comm: ldlm_bl_06 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647601.592837] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647601.592839] task: ffff9dad66f3e180 ti: ffff9daf1920c000 task.ti: ffff9daf1920c000 [1647601.592840] RIP: 0010:[] [] __write_lock_failed+0x9/0x20 [1647601.592848] RSP: 0000:ffff9daf1920fc80 EFLAGS: 00000297 [1647601.592849] RAX: ffff9dbf2ca04068 RBX: 00000000000000a0 RCX: ffff9dbf2ca04040 [1647601.592850] RDX: 0000000000000001 RSI: ffff9dba047400b0 RDI: ffff9dbf2ca0405c [1647601.592851] RBP: ffff9daf1920fc80 R08: ffff9dbaccf584f0 R09: 0000000000000001 [1647601.592852] R10: 0000000000000000 R11: fffff902cb286e00 R12: ffff9dbef5b08140 [1647601.592853] R13: ffffffffc12cc36f R14: ffff9daf1920fc38 R15: ffff9dae905b2b40 [1647601.592855] FS: 0000000000000000(0000) GS:ffff9daf3e740000(0000) knlGS:0000000000000000 [1647601.592856] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647601.592857] CR2: 00007ff17d353f94 CR3: 0000000e443fe000 CR4: 00000000003607e0 [1647601.592858] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647601.592859] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647601.592860] Call Trace: [1647601.592867] [] _raw_write_lock+0x17/0x20 [1647601.592905] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647601.592915] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647601.592942] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647601.592956] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647601.592972] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647601.592987] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647601.593002] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647601.593006] [] ? wake_up_state+0x20/0x20 [1647601.593020] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647601.593023] [] kthread+0xd1/0xe0 [1647601.593025] [] ? insert_kthread_work+0x40/0x40 [1647601.593028] [] ret_from_fork_nospec_begin+0x7/0x21 [1647601.593030] [] ? insert_kthread_work+0x40/0x40 [1647601.593031] Code: 00 00 0f 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 <83> 3f 01 75 f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 00 00 [1647601.694759] NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [ldlm_bl_23:8328] [1647601.695807] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647601.695848] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647601.695889] CPU: 22 PID: 8328 Comm: ldlm_bl_23 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647601.695891] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647601.695893] task: ffff9dad51255140 ti: ffff9d9f47420000 task.ti: ffff9d9f47420000 [1647601.695894] RIP: 0010:[] [] __write_lock_failed+0x9/0x20 [1647601.695901] RSP: 0000:ffff9d9f47423c80 EFLAGS: 00000297 [1647601.695902] RAX: ffff9dbf2ca04068 RBX: 00000000000002c0 RCX: ffff9dbf2ca04040 [1647601.695903] RDX: 0000000000000001 RSI: ffff9dba047402d0 RDI: ffff9dbf2ca0405c [1647601.695904] RBP: ffff9d9f47423c80 R08: ffff9dbf37aaca90 R09: 0000000000000001 [1647601.695905] R10: 0000000000000000 R11: fffff902c0568f80 R12: ffff9dbf0f641b80 [1647601.695906] R13: ffffffffc12cc36f R14: ffff9d9f47423c38 R15: ffff9dbf37d2b610 [1647601.695907] FS: 0000000000000000(0000) GS:ffff9dbf3e080000(0000) knlGS:0000000000000000 [1647601.695909] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647601.695910] CR2: 00007fdbb00131a8 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647601.695911] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647601.695912] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647601.695913] Call Trace: [1647601.695919] [] _raw_write_lock+0x17/0x20 [1647601.695951] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647601.695964] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647601.695991] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647601.696006] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647601.696022] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647601.696038] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647601.696054] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647601.696057] [] ? wake_up_state+0x20/0x20 [1647601.696073] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647601.696076] [] kthread+0xd1/0xe0 [1647601.696078] [] ? insert_kthread_work+0x40/0x40 [1647601.696081] [] ret_from_fork_nospec_begin+0x7/0x21 [1647601.696083] [] ? insert_kthread_work+0x40/0x40 [1647601.696084] Code: 00 00 0f 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 <83> 3f 01 75 f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 00 00 [1647617.598889] NMI watchdog: BUG: soft lockup - CPU#8 stuck for 22s! [ldlm_bl_02:7514] [1647617.599891] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647617.599932] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647617.599973] CPU: 8 PID: 7514 Comm: ldlm_bl_02 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647617.599974] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647617.599977] task: ffff9dba42f82080 ti: ffff9dbf36d84000 task.ti: ffff9dbf36d84000 [1647617.599978] RIP: 0010:[] [] __write_lock_failed+0x7/0x20 [1647617.599986] RSP: 0000:ffff9dbf36d87c80 EFLAGS: 00000297 [1647617.599988] RAX: ffff9dbf2ca04068 RBX: 0000000000000100 RCX: ffff9dbf2ca04040 [1647617.599989] RDX: 0000000000000001 RSI: ffff9dba04740110 RDI: ffff9dbf2ca0405c [1647617.599990] RBP: ffff9dbf36d87c80 R08: ffff9da8e7d8b270 R09: 0000000000000001 [1647617.599991] R10: 0000000000000000 R11: fffff902ea1bbe00 R12: ffff9dbcd9bb4c80 [1647617.599992] R13: ffffffffc12cc36f R14: ffff9dbf36d87c38 R15: ffff9dae905f95a0 [1647617.599993] FS: 0000000000000000(0000) GS:ffff9daf3e800000(0000) knlGS:0000000000000000 [1647617.599994] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647617.599996] CR2: 00007f57c0025008 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647617.599997] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647617.599998] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647617.599999] Call Trace: [1647617.600006] [] _raw_write_lock+0x17/0x20 [1647617.600046] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647617.600057] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647617.600086] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647617.600100] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647617.600116] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647617.600131] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647617.600146] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647617.600150] [] ? wake_up_state+0x20/0x20 [1647617.600164] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647617.600167] [] kthread+0xd1/0xe0 [1647617.600169] [] ? insert_kthread_work+0x40/0x40 [1647617.600172] [] ret_from_fork_nospec_begin+0x7/0x21 [1647617.600174] [] ? insert_kthread_work+0x40/0x40 [1647617.600175] Code: 00 00 00 00 0f 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 90 83 3f 01 75 f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 [1647617.681889] NMI watchdog: BUG: soft lockup - CPU#17 stuck for 22s! [ldlm_bl_04:7748] [1647617.682923] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647617.682963] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647617.683004] CPU: 17 PID: 7748 Comm: ldlm_bl_04 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647617.683006] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647617.683008] task: ffff9dae5622d140 ti: ffff9dbf38738000 task.ti: ffff9dbf38738000 [1647617.683009] RIP: 0010:[] [] __write_lock_failed+0x9/0x20 [1647617.683016] RSP: 0000:ffff9dbf3873bc80 EFLAGS: 00000297 [1647617.683017] RAX: ffff9dbf2ca04068 RBX: 0000000000000220 RCX: ffff9dbf2ca04040 [1647617.683018] RDX: 0000000000000001 RSI: ffff9dba04740230 RDI: ffff9dbf2ca0405c [1647617.683019] RBP: ffff9dbf3873bc80 R08: ffff9dbf39f58850 R09: 0000000000000001 [1647617.683020] R10: 0000000000000000 R11: fffff902e974aa80 R12: ffff9db75ea57340 [1647617.683021] R13: ffffffffc12cc36f R14: ffff9dbf3873bc38 R15: ffff9dbb70b8ab40 [1647617.683022] FS: 0000000000000000(0000) GS:ffff9dbf3df40000(0000) knlGS:0000000000000000 [1647617.683023] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647617.683024] CR2: 0000000000d32b00 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647617.683026] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647617.683027] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647617.683028] Call Trace: [1647617.683033] [] _raw_write_lock+0x17/0x20 [1647617.683065] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647617.683077] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647617.683103] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647617.683119] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647617.683134] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647617.683150] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647617.683166] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647617.683169] [] ? wake_up_state+0x20/0x20 [1647617.683185] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647617.683188] [] kthread+0xd1/0xe0 [1647617.683190] [] ? insert_kthread_work+0x40/0x40 [1647617.683193] [] ret_from_fork_nospec_begin+0x7/0x21 [1647617.683195] [] ? insert_kthread_work+0x40/0x40 [1647617.683196] Code: 00 00 0f 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 <83> 3f 01 75 f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 00 00 [1647625.582955] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [ldlm_bl_07:7754] [1647625.584130] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647625.584174] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647625.584214] CPU: 1 PID: 7754 Comm: ldlm_bl_07 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647625.584216] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647625.584219] task: ffff9dad66f3a080 ti: ffff9dac0b858000 task.ti: ffff9dac0b858000 [1647625.584220] RIP: 0010:[] [] ldlm_kms_shift_cb+0x7b/0x220 [ptlrpc] [1647625.584261] RSP: 0000:ffff9dac0b85bc18 EFLAGS: 00000246 [1647625.584262] RAX: ffff9dae8f607740 RBX: ffff9dbf32acb6c0 RCX: 0000000000000000 [1647625.584263] RDX: ffff9dae8f607948 RSI: 0000040000000000 RDI: ffff9db6d2e7af80 [1647625.584264] RBP: ffff9dac0b85bc28 R08: 0000000000000000 R09: 0000000000000001 [1647625.584265] R10: 0000000000000000 R11: fffff902e524c680 R12: ffff9dbeceb4a8e0 [1647625.584267] R13: 00000000a8b553f1 R14: 00000000c0d40892 R15: ffff9dac0b85bba8 [1647625.584268] FS: 0000000000000000(0000) GS:ffff9daf3e640000(0000) knlGS:0000000000000000 [1647625.584269] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647625.584271] CR2: 000055e87d4500c0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647625.584272] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647625.584273] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647625.584274] Call Trace: [1647625.584292] [] ? ldlm_extent_shift_kms+0x1b0/0x1b0 [ptlrpc] [1647625.584311] [] interval_iterate_reverse+0x53/0x270 [ptlrpc] [1647625.584326] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1647625.584337] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1647625.584351] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647625.584382] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647625.584397] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647625.584412] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647625.584428] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647625.584433] [] ? wake_up_state+0x20/0x20 [1647625.584448] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647625.584451] [] kthread+0xd1/0xe0 [1647625.584453] [] ? insert_kthread_work+0x40/0x40 [1647625.584458] [] ret_from_fork_nospec_begin+0x7/0x21 [1647625.584460] [] ? insert_kthread_work+0x40/0x40 [1647625.584461] Code: 00 00 48 85 b2 f0 fe ff ff 74 20 48 8b 90 08 02 00 00 49 39 d4 48 8d 82 f8 fd ff ff 0f 84 96 00 00 00 48 85 b2 f0 fe ff ff 75 e0 <48> 85 c0 0f 84 84 00 00 00 48 8b 90 d0 00 00 00 48 8b 33 48 39 [1647629.591987] NMI watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [ldlm_bl_06:7752] [1647629.593162] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647629.593203] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647629.593244] CPU: 5 PID: 7752 Comm: ldlm_bl_06 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647629.593245] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647629.593247] task: ffff9dad66f3e180 ti: ffff9daf1920c000 task.ti: ffff9daf1920c000 [1647629.593248] RIP: 0010:[] [] ldlm_kms_shift_cb+0x7b/0x220 [ptlrpc] [1647629.593287] RSP: 0000:ffff9daf1920fc18 EFLAGS: 00000246 [1647629.593288] RAX: ffff9db97cf85c40 RBX: ffff9dae905b2bf0 RCX: 0000000000000000 [1647629.593289] RDX: ffff9db97cf85e48 RSI: 0000040000000000 RDI: ffff9dba8623d200 [1647629.593290] RBP: ffff9daf1920fc28 R08: 0000000000000000 R09: 0000000000000001 [1647629.593291] R10: 0000000000000000 R11: fffff902f939dd00 R12: ffff9da5e5fafbe8 [1647629.593292] R13: 00000000bcd04b77 R14: 00000000c0d40892 R15: ffff9daf1920fba8 [1647629.593294] FS: 0000000000000000(0000) GS:ffff9daf3e740000(0000) knlGS:0000000000000000 [1647629.593295] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647629.593296] CR2: 00007ff17d353f94 CR3: 0000000e443fe000 CR4: 00000000003607e0 [1647629.593297] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647629.593298] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647629.593299] Call Trace: [1647629.593316] [] ? ldlm_extent_shift_kms+0x1b0/0x1b0 [ptlrpc] [1647629.593334] [] interval_iterate_reverse+0x53/0x270 [ptlrpc] [1647629.593348] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1647629.593358] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1647629.593371] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647629.593401] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647629.593415] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647629.593429] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647629.593443] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647629.593448] [] ? wake_up_state+0x20/0x20 [1647629.593462] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647629.593464] [] kthread+0xd1/0xe0 [1647629.593466] [] ? insert_kthread_work+0x40/0x40 [1647629.593471] [] ret_from_fork_nospec_begin+0x7/0x21 [1647629.593473] [] ? insert_kthread_work+0x40/0x40 [1647629.593474] Code: 00 00 48 85 b2 f0 fe ff ff 74 20 48 8b 90 08 02 00 00 49 39 d4 48 8d 82 f8 fd ff ff 0f 84 96 00 00 00 48 85 b2 f0 fe ff ff 75 e0 <48> 85 c0 0f 84 84 00 00 00 48 8b 90 d0 00 00 00 48 8b 33 48 39 [1647631.923004] INFO: rcu_sched self-detected stall on CPU { 8} (t=60000 jiffies g=21655874 c=21655873 q=107936) [1647631.924013] INFO: rcu_sched self-detected stall on CPU { 17} (t=60001 jiffies g=21655874 c=21655873 q=107936) [1647631.924014] Task dump for CPU 8: [1647631.924016] ldlm_bl_02 R running task 0 7514 2 0x00000088 [1647631.924017] Call Trace: [1647631.924024] [] ? __schedule+0x13a/0x860 [1647631.924028] [] ? from_kgid+0x12/0x20 [1647631.924067] [] ? get_my_ctx+0x69/0x140 [ptlrpc] [1647631.924089] [] ? sptlrpc_import_check_ctx+0x1ed/0x3b0 [ptlrpc] [1647631.924102] [] ? ldlm_lock_match_with_skip+0x3a8/0x860 [ptlrpc] [1647631.924105] [] ? call_rcu_sched+0x1d/0x20 [1647631.924108] [] ? __radix_tree_delete_node+0x4f/0x170 [1647631.924111] [] ? free_pcppages_bulk+0x17e/0x3a0 [1647631.924113] [] ? radix_tree_next_chunk+0x116/0x2d0 [1647631.924115] [] ? radix_tree_gang_lookup+0xcd/0x150 [1647631.924142] [] ? cl2vvp_io+0x1d/0x90 [lustre] [1647631.924153] [] ? vvp_io_fini+0x34/0x6b0 [lustre] [1647631.924167] [] ? ldlm_kms_shift_cb+0x7b/0x220 [ptlrpc] [1647631.924181] [] ? ldlm_extent_shift_kms+0x1b0/0x1b0 [ptlrpc] [1647631.924198] [] ? interval_iterate_reverse+0x53/0x270 [ptlrpc] [1647631.924211] [] ? ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1647631.924223] [] ? osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1647631.924235] [] ? ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647631.924261] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647631.924275] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647631.924290] [] ? ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647631.924305] [] ? ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647631.924308] [] ? wake_up_state+0x20/0x20 [1647631.924323] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647631.924325] [] ? kthread+0xd1/0xe0 [1647631.924327] [] ? insert_kthread_work+0x40/0x40 [1647631.924329] [] ? ret_from_fork_nospec_begin+0x7/0x21 [1647631.924331] [] ? insert_kthread_work+0x40/0x40 [1647631.924331] Task dump for CPU 17: [1647631.924333] ldlm_bl_04 R running task 0 7748 2 0x00000088 [1647631.924333] Call Trace: [1647631.924336] [] sched_show_task+0xa8/0x110 [1647631.924338] [] dump_cpu_task+0x39/0x70 [1647631.924340] [] rcu_dump_cpu_stacks+0x90/0xd0 [1647631.924341] [] rcu_check_callbacks+0x442/0x730 [1647631.924345] [] ? tick_sched_do_timer+0x50/0x50 [1647631.924348] [] update_process_times+0x46/0x80 [1647631.924349] [] tick_sched_handle+0x30/0x70 [1647631.924351] [] tick_sched_timer+0x39/0x80 [1647631.924354] [] __hrtimer_run_queues+0xf3/0x270 [1647631.924356] [] hrtimer_interrupt+0xaf/0x1d0 [1647631.924360] [] local_apic_timer_interrupt+0x3b/0x60 [1647631.924363] [] smp_apic_timer_interrupt+0x43/0x60 [1647631.924365] [] apic_timer_interrupt+0x162/0x170 [1647631.924367] [] ? __write_lock_failed+0x7/0x20 [1647631.924370] [] _raw_write_lock+0x17/0x20 [1647631.924387] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647631.924393] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647631.924406] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647631.924420] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647631.924434] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647631.924449] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647631.924464] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647631.924466] [] ? wake_up_state+0x20/0x20 [1647631.924481] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647631.924483] [] kthread+0xd1/0xe0 [1647631.924484] [] ? insert_kthread_work+0x40/0x40 [1647631.924486] [] ret_from_fork_nospec_begin+0x7/0x21 [1647631.924488] [] ? insert_kthread_work+0x40/0x40 [1647631.925013] Task dump for CPU 8: [1647631.925015] ldlm_bl_02 R running task 0 7514 2 0x00000088 [1647631.925018] Call Trace: [1647631.925020] [] sched_show_task+0xa8/0x110 [1647631.925029] [] dump_cpu_task+0x39/0x70 [1647631.925033] [] rcu_dump_cpu_stacks+0x90/0xd0 [1647631.925036] [] rcu_check_callbacks+0x442/0x730 [1647631.925041] [] ? tick_sched_do_timer+0x50/0x50 [1647631.925045] [] update_process_times+0x46/0x80 [1647631.925047] [] tick_sched_handle+0x30/0x70 [1647631.925049] [] tick_sched_timer+0x39/0x80 [1647631.925055] [] __hrtimer_run_queues+0xf3/0x270 [1647631.925057] [] hrtimer_interrupt+0xaf/0x1d0 [1647631.925063] [] local_apic_timer_interrupt+0x3b/0x60 [1647631.925068] [] smp_apic_timer_interrupt+0x43/0x60 [1647631.925072] [] apic_timer_interrupt+0x162/0x170 [1647631.925073] [] ? ldlm_kms_shift_cb+0x7b/0x220 [ptlrpc] [1647631.925127] [] ? ldlm_extent_shift_kms+0x1b0/0x1b0 [ptlrpc] [1647631.925145] [] interval_iterate_reverse+0x53/0x270 [ptlrpc] [1647631.925160] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1647631.925170] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1647631.925184] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647631.925214] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647631.925229] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647631.925245] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647631.925260] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647631.925263] [] ? wake_up_state+0x20/0x20 [1647631.925279] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647631.925281] [] kthread+0xd1/0xe0 [1647631.925284] [] ? insert_kthread_work+0x40/0x40 [1647631.925286] [] ret_from_fork_nospec_begin+0x7/0x21 [1647631.925288] [] ? insert_kthread_work+0x40/0x40 [1647631.925290] Task dump for CPU 17: [1647631.925291] ldlm_bl_04 R running task 0 7748 2 0x00000088 [1647631.925293] Call Trace: [1647631.925297] [] ? __schedule+0x42a/0x860 [1647631.925302] [] ? from_kgid+0x12/0x20 [1647631.925325] [] ? get_my_ctx+0x69/0x140 [ptlrpc] [1647631.925346] [] ? sptlrpc_import_check_ctx+0x1ed/0x3b0 [ptlrpc] [1647631.925360] [] ? ldlm_lock_match_with_skip+0x3a8/0x860 [ptlrpc] [1647631.925364] [] ? intel_pstate_update_util+0x17b/0x310 [1647631.925368] [] ? __slab_free+0x81/0x2f0 [1647631.925372] [] ? radix_tree_descend+0x1a/0x60 [1647631.925374] [] ? radix_tree_next_chunk+0x116/0x2d0 [1647631.925376] [] ? radix_tree_gang_lookup+0xcd/0x150 [1647631.925402] [] ? cl2vvp_io+0x1d/0x90 [lustre] [1647631.925414] [] ? vvp_io_fini+0x34/0x6b0 [lustre] [1647631.925433] [] ? cl_io_fini+0x78/0x250 [obdclass] [1647631.925441] [] ? osc_lock_discard_pages+0x11f/0x2ef [osc] [1647631.925444] [] ? __write_lock_failed+0xc/0x20 [1647631.925446] [] ? _raw_write_lock+0x17/0x20 [1647631.925462] [] ? cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647631.925469] [] ? osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647631.925482] [] ? ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647631.925495] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647631.925510] [] ? ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647631.925525] [] ? ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647631.925541] [] ? ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647631.925543] [] ? wake_up_state+0x20/0x20 [1647631.925559] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647631.925561] [] ? kthread+0xd1/0xe0 [1647631.925563] [] ? insert_kthread_work+0x40/0x40 [1647631.925565] [] ? ret_from_fork_nospec_begin+0x7/0x21 [1647631.925567] [] ? insert_kthread_work+0x40/0x40 [1647649.695151] NMI watchdog: BUG: soft lockup - CPU#22 stuck for 21s! [ldlm_bl_23:8328] [1647649.696320] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647649.696361] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647649.696402] CPU: 22 PID: 8328 Comm: ldlm_bl_23 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647649.696404] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647649.696406] task: ffff9dad51255140 ti: ffff9d9f47420000 task.ti: ffff9d9f47420000 [1647649.696407] RIP: 0010:[] [] __write_lock_failed+0x9/0x20 [1647649.696414] RSP: 0000:ffff9d9f47423c80 EFLAGS: 00000297 [1647649.696416] RAX: ffff9dbf2ca04068 RBX: 00000000000002c0 RCX: ffff9dbf2ca04040 [1647649.696417] RDX: 0000000000000001 RSI: ffff9dba047402d0 RDI: ffff9dbf2ca0405c [1647649.696418] RBP: ffff9d9f47423c80 R08: ffff9dbf37aaca90 R09: 0000000000000001 [1647649.696419] R10: 0000000000000000 R11: fffff902f7fee100 R12: ffff9db75ea521c0 [1647649.696420] R13: ffffffffc12cc36f R14: ffff9d9f47423c38 R15: ffff9dbf37d2b610 [1647649.696421] FS: 0000000000000000(0000) GS:ffff9dbf3e080000(0000) knlGS:0000000000000000 [1647649.696422] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647649.696423] CR2: 00007fdbb00131a8 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647649.696425] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647649.696426] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647649.696427] Call Trace: [1647649.696433] [] _raw_write_lock+0x17/0x20 [1647649.696464] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647649.696477] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647649.696501] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647649.696516] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647649.696531] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647649.696546] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647649.696561] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647649.696564] [] ? wake_up_state+0x20/0x20 [1647649.696579] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647649.696581] [] kthread+0xd1/0xe0 [1647649.696583] [] ? insert_kthread_work+0x40/0x40 [1647649.696586] [] ret_from_fork_nospec_begin+0x7/0x21 [1647649.696588] [] ? insert_kthread_work+0x40/0x40 [1647649.696589] Code: 00 00 0f 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 <83> 3f 01 75 f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 00 00 [1647653.583186] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [ldlm_bl_07:7754] [1647653.584228] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647653.584268] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647653.584308] CPU: 1 PID: 7754 Comm: ldlm_bl_07 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647653.584310] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647653.584312] task: ffff9dad66f3a080 ti: ffff9dac0b858000 task.ti: ffff9dac0b858000 [1647653.584313] RIP: 0010:[] [] interval_last+0x2f/0x120 [ptlrpc] [1647653.584357] RSP: 0000:ffff9dac0b85bc20 EFLAGS: 00000282 [1647653.584358] RAX: 0000000000000001 RBX: ffff9dbeceb4a8c8 RCX: 0000000000000000 [1647653.584359] RDX: ffff9da3a6955400 RSI: 0000000171eb0000 RDI: ffff9db9646ab700 [1647653.584360] RBP: ffff9dac0b85bc28 R08: 0000000000000000 R09: 0000000000000001 [1647653.584361] R10: 0000000000000000 R11: fffff902ed86b200 R12: ffff9dbf32acb6c0 [1647653.584362] R13: ffff9dbf32acb6a8 R14: ffff9dbeceb4a8e0 R15: 00000000a8b553f1 [1647653.584363] FS: 0000000000000000(0000) GS:ffff9daf3e640000(0000) knlGS:0000000000000000 [1647653.584365] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647653.584366] CR2: 000055e87d4500c0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647653.584367] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647653.584368] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647653.584369] Call Trace: [1647653.584388] [] interval_iterate_reverse+0x7d/0x270 [ptlrpc] [1647653.584403] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1647653.584414] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1647653.584427] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647653.584457] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647653.584472] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647653.584486] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647653.584500] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647653.584505] [] ? wake_up_state+0x20/0x20 [1647653.584519] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647653.584522] [] kthread+0xd1/0xe0 [1647653.584524] [] ? insert_kthread_work+0x40/0x40 [1647653.584528] [] ret_from_fork_nospec_begin+0x7/0x21 [1647653.584530] [] ? insert_kthread_work+0x40/0x40 [1647653.584531] Code: 00 8b 0d ad d6 c5 ff 55 48 89 e5 53 48 89 fb 83 e1 01 75 48 48 85 ff 75 0e 31 c0 90 5b 5d c3 0f 1f 44 00 00 48 89 d3 48 8b 53 08 <48> 85 d2 75 f4 85 c9 48 89 d8 74 e5 f6 05 7a d6 c5 ff 01 74 dc [1647657.592221] NMI watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [ldlm_bl_06:7752] [1647657.593266] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647657.593306] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647657.593347] CPU: 5 PID: 7752 Comm: ldlm_bl_06 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647657.593348] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647657.593350] task: ffff9dad66f3e180 ti: ffff9daf1920c000 task.ti: ffff9daf1920c000 [1647657.593352] RIP: 0010:[] [] __write_lock_failed+0x9/0x20 [1647657.593359] RSP: 0000:ffff9daf1920fc80 EFLAGS: 00000297 [1647657.593360] RAX: ffff9dbf2ca04068 RBX: 00000000000000a0 RCX: ffff9dbf2ca04040 [1647657.593361] RDX: 0000000000000001 RSI: ffff9dba047400b0 RDI: ffff9dbf2ca0405c [1647657.593362] RBP: ffff9daf1920fc80 R08: ffff9dbaccf584f0 R09: 0000000000000001 [1647657.593363] R10: 0000000000000000 R11: fffff902ff53a480 R12: ffff9db75ea53e80 [1647657.593364] R13: ffffffffc12cc36f R14: ffff9daf1920fc38 R15: ffff9dae905b2b40 [1647657.593366] FS: 0000000000000000(0000) GS:ffff9daf3e740000(0000) knlGS:0000000000000000 [1647657.593367] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647657.593368] CR2: 00007ff17d353f94 CR3: 0000000e443fe000 CR4: 00000000003607e0 [1647657.593370] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647657.593371] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647657.593372] Call Trace: [1647657.593379] [] _raw_write_lock+0x17/0x20 [1647657.593416] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647657.593427] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647657.593455] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647657.593472] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647657.593488] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647657.593504] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647657.593520] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647657.593524] [] ? wake_up_state+0x20/0x20 [1647657.593540] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647657.593543] [] kthread+0xd1/0xe0 [1647657.593545] [] ? insert_kthread_work+0x40/0x40 [1647657.593548] [] ret_from_fork_nospec_begin+0x7/0x21 [1647657.593550] [] ? insert_kthread_work+0x40/0x40 [1647657.593551] Code: 00 00 0f 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 <83> 3f 01 75 f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 00 00 [1647657.599218] NMI watchdog: BUG: soft lockup - CPU#8 stuck for 22s! [ldlm_bl_02:7514] [1647657.600165] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647657.600196] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647657.600227] CPU: 8 PID: 7514 Comm: ldlm_bl_02 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647657.600229] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647657.600230] task: ffff9dba42f82080 ti: ffff9dbf36d84000 task.ti: ffff9dbf36d84000 [1647657.600231] RIP: 0010:[] [] __write_lock_failed+0x9/0x20 [1647657.600236] RSP: 0000:ffff9dbf36d87c80 EFLAGS: 00000297 [1647657.600237] RAX: ffff9dbf2ca04068 RBX: 0000000000000100 RCX: ffff9dbf2ca04040 [1647657.600238] RDX: 0000000000000001 RSI: ffff9dba04740110 RDI: ffff9dbf2ca0405c [1647657.600239] RBP: ffff9dbf36d87c80 R08: ffff9da8e7d8b270 R09: 0000000000000001 [1647657.600240] R10: 0000000000000000 R11: fffff9032348cc00 R12: ffff9db75ea56e40 [1647657.600241] R13: ffffffffc12cc36f R14: ffff9dbf36d87c38 R15: ffff9dae905f95a0 [1647657.600243] FS: 0000000000000000(0000) GS:ffff9daf3e800000(0000) knlGS:0000000000000000 [1647657.600244] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647657.600245] CR2: 00007f57c0025008 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647657.600246] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647657.600247] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647657.600248] Call Trace: [1647657.600252] [] _raw_write_lock+0x17/0x20 [1647657.600276] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647657.600286] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647657.600307] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647657.600321] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647657.600336] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647657.600350] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647657.600364] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647657.600367] [] ? wake_up_state+0x20/0x20 [1647657.600381] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647657.600384] [] kthread+0xd1/0xe0 [1647657.600385] [] ? insert_kthread_work+0x40/0x40 [1647657.600388] [] ret_from_fork_nospec_begin+0x7/0x21 [1647657.600390] [] ? insert_kthread_work+0x40/0x40 [1647657.600391] Code: 00 00 0f 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 <83> 3f 01 75 f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 00 00 [1647657.682221] NMI watchdog: BUG: soft lockup - CPU#17 stuck for 22s! [ldlm_bl_04:7748] [1647657.683455] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647657.683496] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647657.683538] CPU: 17 PID: 7748 Comm: ldlm_bl_04 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647657.683540] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647657.683542] task: ffff9dae5622d140 ti: ffff9dbf38738000 task.ti: ffff9dbf38738000 [1647657.683543] RIP: 0010:[] [] __write_lock_failed+0x9/0x20 [1647657.683549] RSP: 0000:ffff9dbf3873bc80 EFLAGS: 00000297 [1647657.683550] RAX: ffff9dbf2ca04068 RBX: 0000000000000220 RCX: ffff9dbf2ca04040 [1647657.683552] RDX: 0000000000000001 RSI: ffff9dba04740230 RDI: ffff9dbf2ca0405c [1647657.683553] RBP: ffff9dbf3873bc80 R08: ffff9dbf39f58850 R09: 0000000000000001 [1647657.683554] R10: 0000000000000000 R11: fffff903217dab80 R12: ffff9db75ea55540 [1647657.683555] R13: ffffffffc12cc36f R14: ffff9dbf3873bc38 R15: ffff9dbb70b8ab40 [1647657.683557] FS: 0000000000000000(0000) GS:ffff9dbf3df40000(0000) knlGS:0000000000000000 [1647657.683558] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647657.683559] CR2: 0000000000d32b00 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647657.683561] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647657.683562] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647657.683563] Call Trace: [1647657.683568] [] _raw_write_lock+0x17/0x20 [1647657.683600] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647657.683612] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647657.683638] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647657.683653] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647657.683669] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647657.683685] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647657.683701] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647657.683704] [] ? wake_up_state+0x20/0x20 [1647657.683720] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647657.683722] [] kthread+0xd1/0xe0 [1647657.683724] [] ? insert_kthread_work+0x40/0x40 [1647657.683727] [] ret_from_fork_nospec_begin+0x7/0x21 [1647657.683729] [] ? insert_kthread_work+0x40/0x40 [1647657.683730] Code: 00 00 0f 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 <83> 3f 01 75 f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 00 00 [1647677.695390] NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [ldlm_bl_23:8328] [1647677.696479] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647677.696520] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647677.696562] CPU: 22 PID: 8328 Comm: ldlm_bl_23 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647677.696563] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647677.696565] task: ffff9dad51255140 ti: ffff9d9f47420000 task.ti: ffff9d9f47420000 [1647677.696567] RIP: 0010:[] [] __write_lock_failed+0xc/0x20 [1647677.696573] RSP: 0000:ffff9d9f47423c80 EFLAGS: 00000297 [1647677.696574] RAX: ffff9dbf2ca04068 RBX: 00000000000002c0 RCX: ffff9dbf2ca04040 [1647677.696575] RDX: 0000000000000001 RSI: ffff9dba047402d0 RDI: ffff9dbf2ca0405c [1647677.696576] RBP: ffff9d9f47423c80 R08: ffff9dbf37aaca90 R09: 0000000000000001 [1647677.696577] R10: 0000000000000000 R11: fffff902fd58dd80 R12: ffff9db75ea535c0 [1647677.696578] R13: ffffffffc12cc36f R14: ffff9d9f47423c38 R15: ffff9dbf37d2b610 [1647677.696580] FS: 0000000000000000(0000) GS:ffff9dbf3e080000(0000) knlGS:0000000000000000 [1647677.696581] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647677.696582] CR2: 00007fdbb00131a8 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647677.696583] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647677.696584] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647677.696585] Call Trace: [1647677.696590] [] _raw_write_lock+0x17/0x20 [1647677.696622] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647677.696634] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647677.696660] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647677.696675] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647677.696690] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647677.696705] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647677.696721] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647677.696724] [] ? wake_up_state+0x20/0x20 [1647677.696738] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647677.696741] [] kthread+0xd1/0xe0 [1647677.696742] [] ? insert_kthread_work+0x40/0x40 [1647677.696745] [] ret_from_fork_nospec_begin+0x7/0x21 [1647677.696747] [] ? insert_kthread_work+0x40/0x40 [1647677.696748] Code: 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 83 3f 01 <75> f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 00 00 00 00 55 [1647681.583423] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 23s! [ldlm_bl_07:7754] [1647681.584461] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647681.584502] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647681.584542] CPU: 1 PID: 7754 Comm: ldlm_bl_07 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647681.584544] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647681.584546] task: ffff9dad66f3a080 ti: ffff9dac0b858000 task.ti: ffff9dac0b858000 [1647681.584547] RIP: 0010:[] [] __write_lock_failed+0xc/0x20 [1647681.584555] RSP: 0000:ffff9dac0b85bc80 EFLAGS: 00000297 [1647681.584556] RAX: ffff9dbf2ca04068 RBX: 0000000000000020 RCX: ffff9dbf2ca04040 [1647681.584557] RDX: 0000000000000001 RSI: ffff9dba04740030 RDI: ffff9dbf2ca0405c [1647681.584558] RBP: ffff9dac0b85bc80 R08: ffff9da8e7d8bdb0 R09: 0000000000000001 [1647681.584559] R10: 0000000000000000 R11: fffff9032c6ccf00 R12: ffff9db75ea51b80 [1647681.584560] R13: ffffffffc12cc36f R14: ffff9dac0b85bc38 R15: ffff9dbf32acb610 [1647681.584562] FS: 0000000000000000(0000) GS:ffff9daf3e640000(0000) knlGS:0000000000000000 [1647681.584563] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647681.584564] CR2: 000055e87d4500c0 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647681.584565] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647681.584566] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647681.584567] Call Trace: [1647681.584575] [] _raw_write_lock+0x17/0x20 [1647681.584615] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647681.584626] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647681.584654] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647681.584670] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647681.584686] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647681.584701] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647681.584716] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647681.584720] [] ? wake_up_state+0x20/0x20 [1647681.584734] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647681.584737] [] kthread+0xd1/0xe0 [1647681.584739] [] ? insert_kthread_work+0x40/0x40 [1647681.584742] [] ret_from_fork_nospec_begin+0x7/0x21 [1647681.584744] [] ? insert_kthread_work+0x40/0x40 [1647681.584745] Code: 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 83 3f 01 <75> f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 00 00 00 00 55 [1647685.592458] NMI watchdog: BUG: soft lockup - CPU#5 stuck for 22s! [ldlm_bl_06:7752] [1647685.593490] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647685.593529] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647685.593569] CPU: 5 PID: 7752 Comm: ldlm_bl_06 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647685.593571] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647685.593573] task: ffff9dad66f3e180 ti: ffff9daf1920c000 task.ti: ffff9daf1920c000 [1647685.593574] RIP: 0010:[] [] __write_lock_failed+0xc/0x20 [1647685.593582] RSP: 0000:ffff9daf1920fc80 EFLAGS: 00000297 [1647685.593583] RAX: ffff9dbf2ca04068 RBX: 00000000000000a0 RCX: ffff9dbf2ca04040 [1647685.593584] RDX: 0000000000000001 RSI: ffff9dba047400b0 RDI: ffff9dbf2ca0405c [1647685.593585] RBP: ffff9daf1920fc80 R08: ffff9dbaccf584f0 R09: 0000000000000001 [1647685.593586] R10: 0000000000000000 R11: fffff902fee37080 R12: ffff9dbef5b09f40 [1647685.593587] R13: ffffffffc12cc36f R14: ffff9daf1920fc38 R15: ffff9dae905b2b40 [1647685.593589] FS: 0000000000000000(0000) GS:ffff9daf3e740000(0000) knlGS:0000000000000000 [1647685.593590] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647685.593591] CR2: 00007ff17d353f94 CR3: 0000000e443fe000 CR4: 00000000003607e0 [1647685.593592] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647685.593593] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647685.593594] Call Trace: [1647685.593602] [] _raw_write_lock+0x17/0x20 [1647685.593639] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647685.593649] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647685.593679] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647685.593694] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647685.593709] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647685.593724] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647685.593739] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647685.593743] [] ? wake_up_state+0x20/0x20 [1647685.593757] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647685.593760] [] kthread+0xd1/0xe0 [1647685.593762] [] ? insert_kthread_work+0x40/0x40 [1647685.593765] [] ret_from_fork_nospec_begin+0x7/0x21 [1647685.593767] [] ? insert_kthread_work+0x40/0x40 [1647685.593768] Code: 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 83 3f 01 <75> f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 00 00 00 00 55 [1647685.599455] NMI watchdog: BUG: soft lockup - CPU#8 stuck for 22s! [ldlm_bl_02:7514] [1647685.600350] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647685.600377] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647685.600404] CPU: 8 PID: 7514 Comm: ldlm_bl_02 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647685.600405] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647685.600407] task: ffff9dba42f82080 ti: ffff9dbf36d84000 task.ti: ffff9dbf36d84000 [1647685.600408] RIP: 0010:[] [] __write_lock_failed+0xc/0x20 [1647685.600412] RSP: 0000:ffff9dbf36d87c80 EFLAGS: 00000297 [1647685.600413] RAX: ffff9dbf2ca04068 RBX: 0000000000000100 RCX: ffff9dbf2ca04040 [1647685.600414] RDX: 0000000000000001 RSI: ffff9dba04740110 RDI: ffff9dbf2ca0405c [1647685.600415] RBP: ffff9dbf36d87c80 R08: ffff9da8e7d8b270 R09: 0000000000000001 [1647685.600416] R10: 0000000000000000 R11: fffff903391c9180 R12: ffff9db75ea50000 [1647685.600417] R13: ffffffffc12cc36f R14: ffff9dbf36d87c38 R15: ffff9dae905f95a0 [1647685.600418] FS: 0000000000000000(0000) GS:ffff9daf3e800000(0000) knlGS:0000000000000000 [1647685.600419] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647685.600420] CR2: 00007f57c0025008 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647685.600421] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647685.600422] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647685.600423] Call Trace: [1647685.600426] [] _raw_write_lock+0x17/0x20 [1647685.600444] [] cl_object_attr_write_lock+0x1a/0x20 [obdclass] [1647685.600452] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1647685.600467] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647685.600481] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647685.600495] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647685.600509] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647685.600523] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647685.600526] [] ? wake_up_state+0x20/0x20 [1647685.600540] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647685.600542] [] kthread+0xd1/0xe0 [1647685.600544] [] ? insert_kthread_work+0x40/0x40 [1647685.600546] [] ret_from_fork_nospec_begin+0x7/0x21 [1647685.600548] [] ? insert_kthread_work+0x40/0x40 [1647685.600549] Code: 1f 44 00 00 41 ff e7 e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 3c 24 c3 90 90 90 90 90 90 90 55 48 89 e5 f0 ff 07 f3 90 83 3f 01 <75> f9 f0 ff 0f 75 f1 5d c3 90 66 2e 0f 1f 84 00 00 00 00 00 55 [1647685.682457] NMI watchdog: BUG: soft lockup - CPU#17 stuck for 22s! [ldlm_bl_04:7748] [1647685.683453] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647685.683493] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647685.683535] CPU: 17 PID: 7748 Comm: ldlm_bl_04 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647685.683536] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647685.683538] task: ffff9dae5622d140 ti: ffff9dbf38738000 task.ti: ffff9dbf38738000 [1647685.683539] RIP: 0010:[] [] ldlm_kms_shift_cb+0xa8/0x220 [ptlrpc] [1647685.683574] RSP: 0000:ffff9dbf3873bc18 EFLAGS: 00000202 [1647685.683575] RAX: ffff9da1fa4acec0 RBX: ffff9dbb70b8abf0 RCX: 0000000000000000 [1647685.683577] RDX: 0000000079234000 RSI: 0000000171a51000 RDI: ffff9dba9c672580 [1647685.683578] RBP: ffff9dbf3873bc28 R08: 0000000000000000 R09: 0000000000000001 [1647685.683579] R10: 0000000000000000 R11: fffff9032f948e00 R12: ffff9dba59eae370 [1647685.683580] R13: 000000007cda9a43 R14: 00000000c0d40892 R15: ffff9dbf3873bba8 [1647685.683582] FS: 0000000000000000(0000) GS:ffff9dbf3df40000(0000) knlGS:0000000000000000 [1647685.683583] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647685.683584] CR2: 0000000000d32b00 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647685.683585] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647685.683587] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647685.683588] Call Trace: [1647685.683604] [] ? ldlm_extent_shift_kms+0x1b0/0x1b0 [ptlrpc] [1647685.683622] [] interval_iterate_reverse+0x53/0x270 [ptlrpc] [1647685.683637] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1647685.683649] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1647685.683663] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647685.683688] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647685.683703] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647685.683719] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647685.683734] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647685.683739] [] ? wake_up_state+0x20/0x20 [1647685.683755] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647685.683757] [] kthread+0xd1/0xe0 [1647685.683759] [] ? insert_kthread_work+0x40/0x40 [1647685.683763] [] ret_from_fork_nospec_begin+0x7/0x21 [1647685.683765] [] ? insert_kthread_work+0x40/0x40 [1647685.683766] Code: c0 0f 84 84 00 00 00 48 8b 90 d0 00 00 00 48 8b 33 48 39 f2 73 4d 48 83 c2 01 48 3b 53 08 76 04 48 89 53 08 83 b8 94 00 00 00 02 <0f> 84 12 01 00 00 85 c9 74 6e f6 05 b5 30 c7 ff 01 74 65 be 01 [1647705.695627] NMI watchdog: BUG: soft lockup - CPU#22 stuck for 22s! [ldlm_bl_23:8328] [1647705.695664] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1647705.695699] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1647705.695702] CPU: 22 PID: 8328 Comm: ldlm_bl_23 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1647705.695702] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1647705.695704] task: ffff9dad51255140 ti: ffff9d9f47420000 task.ti: ffff9d9f47420000 [1647705.695736] RIP: 0010:[] [] ldlm_kms_shift_cb+0x7b/0x220 [ptlrpc] [1647705.695737] RSP: 0000:ffff9d9f47423c18 EFLAGS: 00000246 [1647705.695738] RAX: ffff9da9d13686c0 RBX: ffff9dbf37d2b6c0 RCX: 0000000000000000 [1647705.695738] RDX: ffff9da9d13688c8 RSI: 0000040000000000 RDI: ffff9dad2b014700 [1647705.695739] RBP: ffff9d9f47423c28 R08: 0000000000000000 R09: 0000000000000001 [1647705.695739] R10: 0000000000000000 R11: fffff902ee877b80 R12: ffff9dba59eaf678 [1647705.695740] R13: 00000000313aee60 R14: 00000000c0d40892 R15: ffff9d9f47423ba8 [1647705.695741] FS: 0000000000000000(0000) GS:ffff9dbf3e080000(0000) knlGS:0000000000000000 [1647705.695741] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1647705.695742] CR2: 00007fdbb00131a8 CR3: 000000153ac10000 CR4: 00000000003607e0 [1647705.695743] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1647705.695743] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1647705.695744] Call Trace: [1647705.695760] [] ? ldlm_extent_shift_kms+0x1b0/0x1b0 [ptlrpc] [1647705.695777] [] interval_iterate_reverse+0x53/0x270 [ptlrpc] [1647705.695792] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1647705.695804] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1647705.695817] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1647705.695843] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1647705.695857] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1647705.695872] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1647705.695888] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1647705.695892] [] ? wake_up_state+0x20/0x20 [1647705.695907] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1647705.695909] [] kthread+0xd1/0xe0 [1647705.695911] [] ? insert_kthread_work+0x40/0x40 [1647705.695914] [] ret_from_fork_nospec_begin+0x7/0x21 [1647705.695916] [] ? insert_kthread_work+0x40/0x40 [1647705.695930] Code: 00 00 48 85 b2 f0 fe ff ff 74 20 48 8b 90 08 02 00 00 49 39 d4 48 8d 82 f8 fd ff ff 0f 84 96 00 00 00 48 85 b2 f0 fe ff ff 75 e0 <48> 85 c0 0f 84 84 00 00 00 48 8b 90 d0 00 00 00 48 8b 33 48 39 [1647879.889503] Lustre: Unmounted ai400-client [1647886.052071] LNet: Removed LNI 10.0.13.150@o2ib10 [1648630.238283] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [1648630.239683] alg: No test for adler32 (adler32-zlib) [1648631.032630] Lustre: Lustre: Build Version: 2.12.58_146_g41bc5f0 [1648631.095526] LNet: 11373:0:(config.c:1641:lnet_inet_enumerate()) lnet: Ignoring interface dummy0: it's down [1648631.095546] LNet: Using FMR for registration [1648631.105168] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [1648632.298355] Lustre: Mounted ai400-client [1651621.668613] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 23s! [ldlm_bl_18:12124] [1651621.669842] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1651621.669883] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1651621.669925] CPU: 13 PID: 12124 Comm: ldlm_bl_18 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1651621.669927] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1651621.669929] task: ffff9daf377e1040 ti: ffff9daf19fdc000 task.ti: ffff9daf19fdc000 [1651621.669931] RIP: 0010:[] [] ldlm_kms_shift_cb+0x7b/0x220 [ptlrpc] [1651621.669963] RSP: 0000:ffff9daf19fdfc18 EFLAGS: 00000246 [1651621.669964] RAX: ffff9da65beea640 RBX: ffff9db6cfee2bf0 RCX: 0000000000000000 [1651621.669965] RDX: ffff9da65beea848 RSI: 0000040000000000 RDI: ffff9dadfbb7d200 [1651621.669967] RBP: ffff9daf19fdfc28 R08: 0000000000000000 R09: 0000000000000001 [1651621.669968] R10: 0000000000000000 R11: fffff9033fd1d500 R12: ffff9dbf2baa2540 [1651621.669969] R13: 0000000087015fc8 R14: 00000000c1249892 R15: ffff9daf19fdfba8 [1651621.669971] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [1651621.669972] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1651621.669973] CR2: 0000000000de2078 CR3: 000000153ac10000 CR4: 00000000003607e0 [1651621.669975] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1651621.669976] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1651621.669977] Call Trace: [1651621.669993] [] ? ldlm_extent_shift_kms+0x1b0/0x1b0 [ptlrpc] [1651621.670012] [] interval_iterate_reverse+0x53/0x270 [ptlrpc] [1651621.670026] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1651621.670036] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1651621.670050] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1651621.670073] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1651621.670088] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1651621.670103] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1651621.670119] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1651621.670124] [] ? wake_up_state+0x20/0x20 [1651621.670139] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1651621.670143] [] kthread+0xd1/0xe0 [1651621.670145] [] ? insert_kthread_work+0x40/0x40 [1651621.670149] [] ret_from_fork_nospec_begin+0x7/0x21 [1651621.670151] [] ? insert_kthread_work+0x40/0x40 [1651621.670152] Code: 00 00 48 85 b2 f0 fe ff ff 74 20 48 8b 90 08 02 00 00 49 39 d4 48 8d 82 f8 fd ff ff 0f 84 96 00 00 00 48 85 b2 f0 fe ff ff 75 e0 <48> 85 c0 0f 84 84 00 00 00 48 8b 90 d0 00 00 00 48 8b 33 48 39 [1651621.683613] NMI watchdog: BUG: soft lockup - CPU#19 stuck for 22s! [ldlm_bl_96:14922] [1651621.684525] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1651621.684554] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1651621.684586] CPU: 19 PID: 14922 Comm: ldlm_bl_96 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1651621.684587] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1651621.684589] task: ffff9d9f6e634100 ti: ffff9dbe0b24c000 task.ti: ffff9dbe0b24c000 [1651621.684590] RIP: 0010:[] [] ldlm_kms_shift_cb+0xa8/0x220 [ptlrpc] [1651621.684617] RSP: 0000:ffff9dbe0b24fc18 EFLAGS: 00000202 [1651621.684618] RAX: ffff9da81ffc72c0 RBX: ffff9dba56afc190 RCX: 0000000000000000 [1651621.684619] RDX: 00000000e6ca4000 RSI: 0000000171695000 RDI: ffff9da66726d780 [1651621.684620] RBP: ffff9dbe0b24fc28 R08: 0000000000000000 R09: 0000000000000001 [1651621.684621] R10: 0000000000000000 R11: fffff90340b28280 R12: ffff9db9f0e23a18 [1651621.684622] R13: 00000000bd425ae8 R14: 00000000c1249892 R15: ffff9dbe0b24fba8 [1651621.684624] FS: 0000000000000000(0000) GS:ffff9dbf3dfc0000(0000) knlGS:0000000000000000 [1651621.684625] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1651621.684626] CR2: 00007fb9da91bf94 CR3: 0000001e3ebea000 CR4: 00000000003607e0 [1651621.684627] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1651621.684628] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1651621.684629] Call Trace: [1651621.684644] [] ? ldlm_extent_shift_kms+0x1b0/0x1b0 [ptlrpc] [1651621.684660] [] interval_iterate_reverse+0x53/0x270 [ptlrpc] [1651621.684674] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1651621.684683] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1651621.684696] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1651621.684714] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1651621.684728] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1651621.684742] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1651621.684757] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1651621.684759] [] ? wake_up_state+0x20/0x20 [1651621.684773] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1651621.684775] [] kthread+0xd1/0xe0 [1651621.684777] [] ? insert_kthread_work+0x40/0x40 [1651621.684780] [] ret_from_fork_nospec_begin+0x7/0x21 [1651621.684782] [] ? insert_kthread_work+0x40/0x40 [1651621.684783] Code: c0 0f 84 84 00 00 00 48 8b 90 d0 00 00 00 48 8b 33 48 39 f2 73 4d 48 83 c2 01 48 3b 53 08 76 04 48 89 53 08 83 b8 94 00 00 00 02 <0f> 84 12 01 00 00 85 c9 74 6e f6 05 b5 90 c5 ff 01 74 65 be 01 [1659658.976995] Lustre: Unmounted ai400-client [1659665.320398] LNet: Removed LNI 10.0.13.150@o2ib10 [1665799.482480] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [1665799.483922] alg: No test for adler32 (adler32-zlib) [1665800.274228] Lustre: Lustre: Build Version: 2.12.58 [1665800.340028] LNet: 14579:0:(config.c:1627:lnet_inet_enumerate()) lnet: Ignoring interface dummy0: it's down [1665800.340048] LNet: Using FMR for registration [1665800.349252] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [1665801.536374] Lustre: Mounted ai400-client [1666869.520758] NMI watchdog: BUG: soft lockup - CPU#2 stuck for 23s! [ldlm_bl_08:15329] [1666869.521755] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1666869.521797] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1666869.521837] CPU: 2 PID: 15329 Comm: ldlm_bl_08 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1666869.521839] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1666869.521841] task: ffff9dbf367ca080 ti: ffff9dba2ee0c000 task.ti: ffff9dba2ee0c000 [1666869.521842] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1666869.521852] RSP: 0018:ffff9dba2ee0fc70 EFLAGS: 00000246 [1666869.521853] RAX: 0000000000000000 RBX: ffff9dac4a064a00 RCX: 0000000000110000 [1666869.521854] RDX: ffff9dbf3df9b780 RSI: 0000000000910001 RDI: ffff9da97bfb19c8 [1666869.521855] RBP: ffff9dba2ee0fc70 R08: ffff9daf3e69b780 R09: 0000000000000000 [1666869.521857] R10: 0000000000000000 R11: fffff9030066db80 R12: ffff9dba2ee0fc38 [1666869.521858] R13: ffff9dae1f18d900 R14: ffff9daa2c30a368 R15: ffff9dae1f18d9e8 [1666869.521859] FS: 0000000000000000(0000) GS:ffff9daf3e680000(0000) knlGS:0000000000000000 [1666869.521860] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1666869.521861] CR2: 00007f8d12f32000 CR3: 0000000069e5a000 CR4: 00000000003607e0 [1666869.521863] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1666869.521864] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1666869.521865] Call Trace: [1666869.521872] [] queued_spin_lock_slowpath+0xb/0xf [1666869.521878] [] _raw_spin_lock+0x20/0x30 [1666869.521915] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1666869.521927] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1666869.521956] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1666869.521971] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1666869.521987] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1666869.522003] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1666869.522019] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1666869.522023] [] ? wake_up_state+0x20/0x20 [1666869.522039] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1666869.522042] [] kthread+0xd1/0xe0 [1666869.522044] [] ? insert_kthread_work+0x40/0x40 [1666869.522047] [] ret_from_fork_nospec_begin+0x7/0x21 [1666869.522049] [] ? insert_kthread_work+0x40/0x40 [1666869.522050] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1666869.539757] NMI watchdog: BUG: soft lockup - CPU#10 stuck for 23s! [ldlm_bl_65:15988] [1666869.540653] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1666869.540686] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1666869.540718] CPU: 10 PID: 15988 Comm: ldlm_bl_65 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1666869.540720] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1666869.540722] task: ffff9dbf39065140 ti: ffff9db7272d0000 task.ti: ffff9db7272d0000 [1666869.540723] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1666869.540730] RSP: 0018:ffff9db7272d3c70 EFLAGS: 00000246 [1666869.540731] RAX: 0000000000000000 RBX: ffff9daf093c7ac0 RCX: 0000000000510000 [1666869.540732] RDX: ffff9daf3e69b780 RSI: 0000000000110001 RDI: ffff9da97bfb19c8 [1666869.540733] RBP: ffff9db7272d3c70 R08: ffff9daf3e89b780 R09: 0000000000000000 [1666869.540735] R10: 0000000000000000 R11: fffff902c0f6c380 R12: ffff9db7272d3c38 [1666869.540735] R13: ffff9da9036f8000 R14: ffff9da1bfc67e68 R15: ffff9da9036f80e8 [1666869.540737] FS: 0000000000000000(0000) GS:ffff9daf3e880000(0000) knlGS:0000000000000000 [1666869.540738] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1666869.540739] CR2: 00000000010c2000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1666869.540740] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1666869.540741] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1666869.540742] Call Trace: [1666869.540747] [] queued_spin_lock_slowpath+0xb/0xf [1666869.540750] [] _raw_spin_lock+0x20/0x30 [1666869.540779] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1666869.540789] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1666869.540812] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1666869.540825] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1666869.540840] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1666869.540854] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1666869.540869] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1666869.540872] [] ? wake_up_state+0x20/0x20 [1666869.540886] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1666869.540888] [] kthread+0xd1/0xe0 [1666869.540890] [] ? insert_kthread_work+0x40/0x40 [1666869.540892] [] ret_from_fork_nospec_begin+0x7/0x21 [1666869.540894] [] ? insert_kthread_work+0x40/0x40 [1666869.540895] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1666869.610760] NMI watchdog: BUG: soft lockup - CPU#14 stuck for 22s! [ldlm_bl_34:15956] [1666869.611765] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1666869.611805] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1666869.611846] CPU: 14 PID: 15956 Comm: ldlm_bl_34 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1666869.611848] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1666869.611849] task: ffff9daf3fbac100 ti: ffff9db6f3e30000 task.ti: ffff9db6f3e30000 [1666869.611851] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1666869.611859] RSP: 0018:ffff9db6f3e33c70 EFLAGS: 00000246 [1666869.611860] RAX: 0000000000000000 RBX: ffff9dadd52d4500 RCX: 0000000000710000 [1666869.611861] RDX: ffff9daf3e81b780 RSI: 0000000000410001 RDI: ffff9da97bfb19c8 [1666869.611862] RBP: ffff9db6f3e33c70 R08: ffff9dbf3de9b780 R09: 0000000000000000 [1666869.611863] R10: 0000000000000000 R11: fffff90340d99180 R12: ffff9db6f3e33c38 [1666869.611864] R13: ffff9dbafc6942c0 R14: ffff9dbf2a757178 R15: ffff9dbafc6943a8 [1666869.611866] FS: 0000000000000000(0000) GS:ffff9dbf3de80000(0000) knlGS:0000000000000000 [1666869.611867] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1666869.611868] CR2: 00000000026bd000 CR3: 000000200bf78000 CR4: 00000000003607e0 [1666869.611869] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1666869.611870] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1666869.611871] Call Trace: [1666869.611877] [] queued_spin_lock_slowpath+0xb/0xf [1666869.611882] [] _raw_spin_lock+0x20/0x30 [1666869.611912] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1666869.611923] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1666869.611948] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1666869.611963] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1666869.611979] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1666869.611994] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1666869.612010] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1666869.612014] [] ? wake_up_state+0x20/0x20 [1666869.612030] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1666869.612032] [] kthread+0xd1/0xe0 [1666869.612035] [] ? insert_kthread_work+0x40/0x40 [1666869.612038] [] ret_from_fork_nospec_begin+0x7/0x21 [1666869.612040] [] ? insert_kthread_work+0x40/0x40 [1666869.612041] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1666869.625758] NMI watchdog: BUG: soft lockup - CPU#20 stuck for 23s! [ldlm_bl_24:15946] [1666869.626661] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1666869.626691] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1666869.626722] CPU: 20 PID: 15946 Comm: ldlm_bl_24 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1666869.626723] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1666869.626725] task: ffff9dafe90eb0c0 ti: ffff9d9f09f68000 task.ti: ffff9d9f09f68000 [1666869.626726] RIP: 0010:[] [] ldlm_kms_shift_cb+0x7b/0x220 [ptlrpc] [1666869.626748] RSP: 0018:ffff9d9f09f6bc18 EFLAGS: 00000246 [1666869.626750] RAX: ffff9db25bff6540 RBX: ffff9dbd432c43c0 RCX: 0000000000000000 [1666869.626751] RDX: ffff9db25bff6748 RSI: 0000040000000000 RDI: ffff9dbefe290c00 [1666869.626752] RBP: ffff9d9f09f6bc28 R08: 0000000000000000 R09: ffff9dbf3de5b780 [1666869.626753] R10: 0000000000000000 R11: fffff902f5db9600 R12: ffff9db7287f8588 [1666869.626754] R13: 00000000be3d7db7 R14: 00000000c0d40639 R15: ffff9dbd432c42c0 [1666869.626755] FS: 0000000000000000(0000) GS:ffff9dbf3e000000(0000) knlGS:0000000000000000 [1666869.626756] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1666869.626757] CR2: 0000000000ff9000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1666869.626758] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1666869.626759] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1666869.626760] Call Trace: [1666869.626775] [] ? ldlm_extent_shift_kms+0x1b0/0x1b0 [ptlrpc] [1666869.626792] [] interval_iterate_reverse+0x53/0x270 [ptlrpc] [1666869.626806] [] ldlm_extent_shift_kms+0xa2/0x1b0 [ptlrpc] [1666869.626808] [] ? remove_waiter+0x66/0x126 [1666869.626816] [] osc_ldlm_blocking_ast+0x306/0x3a0 [osc] [1666869.626829] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1666869.626845] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1666869.626859] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1666869.626872] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1666869.626887] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1666869.626889] [] ? wake_up_state+0x20/0x20 [1666869.626904] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1666869.626906] [] kthread+0xd1/0xe0 [1666869.626907] [] ? insert_kthread_work+0x40/0x40 [1666869.626910] [] ret_from_fork_nospec_begin+0x7/0x21 [1666869.626912] [] ? insert_kthread_work+0x40/0x40 [1666869.626913] Code: 00 00 48 85 b2 f0 fe ff ff 74 20 48 8b 90 08 02 00 00 49 39 d4 48 8d 82 f8 fd ff ff 0f 84 96 00 00 00 48 85 b2 f0 fe ff ff 75 e0 <48> 85 c0 0f 84 84 00 00 00 48 8b 90 d0 00 00 00 48 8b 33 48 39 [1666897.520899] NMI watchdog: BUG: soft lockup - CPU#2 stuck for 23s! [ldlm_bl_08:15329] [1666897.521892] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1666897.521933] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1666897.521974] CPU: 2 PID: 15329 Comm: ldlm_bl_08 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1666897.521976] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1666897.521978] task: ffff9dbf367ca080 ti: ffff9dba2ee0c000 task.ti: ffff9dba2ee0c000 [1666897.521979] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x122/0x200 [1666897.521989] RSP: 0018:ffff9dba2ee0fc70 EFLAGS: 00000246 [1666897.521990] RAX: 0000000000000000 RBX: ffff9da0d1208780 RCX: 0000000000110000 [1666897.521992] RDX: ffff9dbf3dfdb780 RSI: 0000000000990001 RDI: ffff9da97bfb19c8 [1666897.521993] RBP: ffff9dba2ee0fc70 R08: ffff9daf3e69b780 R09: 0000000000000000 [1666897.521994] R10: 0000000000000000 R11: fffff902e7667800 R12: ffff9dba2ee0fc38 [1666897.521995] R13: ffff9dae1f18d900 R14: ffff9daa2c30a368 R15: ffff9dae1f18d9e8 [1666897.521996] FS: 0000000000000000(0000) GS:ffff9daf3e680000(0000) knlGS:0000000000000000 [1666897.521997] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1666897.521998] CR2: 00007f8d12f32000 CR3: 0000000069e5a000 CR4: 00000000003607e0 [1666897.522000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1666897.522001] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1666897.522002] Call Trace: [1666897.522009] [] queued_spin_lock_slowpath+0xb/0xf [1666897.522014] [] _raw_spin_lock+0x20/0x30 [1666897.522051] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1666897.522063] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1666897.522092] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1666897.522106] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1666897.522122] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1666897.522138] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1666897.522154] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1666897.522157] [] ? wake_up_state+0x20/0x20 [1666897.522173] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1666897.522177] [] kthread+0xd1/0xe0 [1666897.522179] [] ? insert_kthread_work+0x40/0x40 [1666897.522182] [] ret_from_fork_nospec_begin+0x7/0x21 [1666897.522184] [] ? insert_kthread_work+0x40/0x40 [1666897.522185] Code: 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 <41> 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b [1666905.805940] INFO: rcu_sched self-detected stall on CPU { 2} (t=60000 jiffies g=22175888 c=22175887 q=171746) [1666905.807118] Task dump for CPU 2: [1666905.807121] ldlm_bl_08 R running task 0 15329 2 0x00000088 [1666905.807124] Call Trace: [1666905.807126] [] sched_show_task+0xa8/0x110 [1666905.807136] [] dump_cpu_task+0x39/0x70 [1666905.807140] [] rcu_dump_cpu_stacks+0x90/0xd0 [1666905.807142] [] rcu_check_callbacks+0x442/0x730 [1666905.807147] [] ? tick_sched_do_timer+0x50/0x50 [1666905.807152] [] update_process_times+0x46/0x80 [1666905.807154] [] tick_sched_handle+0x30/0x70 [1666905.807156] [] tick_sched_timer+0x39/0x80 [1666905.807162] [] __hrtimer_run_queues+0xf3/0x270 [1666905.807164] [] hrtimer_interrupt+0xaf/0x1d0 [1666905.807171] [] local_apic_timer_interrupt+0x3b/0x60 [1666905.807176] [] smp_apic_timer_interrupt+0x43/0x60 [1666905.807180] [] apic_timer_interrupt+0x162/0x170 [1666905.807181] [] ? native_queued_spin_lock_slowpath+0x126/0x200 [1666905.807189] [] queued_spin_lock_slowpath+0xb/0xf [1666905.807193] [] _raw_spin_lock+0x20/0x30 [1666905.807230] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1666905.807242] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1666905.807273] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1666905.807288] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1666905.807304] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1666905.807320] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1666905.807336] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1666905.807338] [] ? wake_up_state+0x20/0x20 [1666905.807354] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1666905.807356] [] kthread+0xd1/0xe0 [1666905.807358] [] ? insert_kthread_work+0x40/0x40 [1666905.807361] [] ret_from_fork_nospec_begin+0x7/0x21 [1666905.807363] [] ? insert_kthread_work+0x40/0x40 [1666917.611000] NMI watchdog: BUG: soft lockup - CPU#14 stuck for 21s! [ldlm_bl_23:15945] [1666917.612046] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1666917.612086] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1666917.612126] CPU: 14 PID: 15945 Comm: ldlm_bl_23 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1666917.612128] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1666917.612129] task: ffff9dbf379c8000 ti: ffff9dbef8f40000 task.ti: ffff9dbef8f40000 [1666917.612131] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x120/0x200 [1666917.612139] RSP: 0018:ffff9dbef8f43c70 EFLAGS: 00000246 [1666917.612140] RAX: 0000000000000000 RBX: ffff9da6671166c0 RCX: 0000000000710000 [1666917.612141] RDX: ffff9dbf3de1b780 RSI: 0000000000610001 RDI: ffff9da97bfb19c8 [1666917.612142] RBP: ffff9dbef8f43c70 R08: ffff9dbf3de9b780 R09: 0000000000000000 [1666917.612143] R10: 0000000000000000 R11: fffff90320a1c600 R12: ffff9dbef8f43c38 [1666917.612144] R13: ffff9dbafc6942c0 R14: ffff9dbf2a757178 R15: ffff9dbafc6943a8 [1666917.612146] FS: 0000000000000000(0000) GS:ffff9dbf3de80000(0000) knlGS:0000000000000000 [1666917.612147] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1666917.612148] CR2: 00000000026bd000 CR3: 000000200bf78000 CR4: 00000000003607e0 [1666917.612149] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1666917.612150] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1666917.612151] Call Trace: [1666917.612156] [] queued_spin_lock_slowpath+0xb/0xf [1666917.612161] [] _raw_spin_lock+0x20/0x30 [1666917.612193] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1666917.612204] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1666917.612228] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1666917.612243] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1666917.612259] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1666917.612274] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1666917.612290] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1666917.612293] [] ? wake_up_state+0x20/0x20 [1666917.612309] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1666917.612312] [] kthread+0xd1/0xe0 [1666917.612314] [] ? insert_kthread_work+0x40/0x40 [1666917.612317] [] ret_from_fork_nospec_begin+0x7/0x21 [1666917.612319] [] ? insert_kthread_work+0x40/0x40 [1666917.612320] Code: c1 e8 13 48 c1 ea 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 90 41 8b 40 08 85 c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 [1666921.623020] NMI watchdog: BUG: soft lockup - CPU#19 stuck for 22s! [ldlm_bl_13:15334] [1666921.624044] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1666921.624084] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1666921.624125] CPU: 19 PID: 15334 Comm: ldlm_bl_13 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1666921.624127] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1666921.624128] task: ffff9dbf367cd140 ti: ffff9db6b3f00000 task.ti: ffff9db6b3f00000 [1666921.624130] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1666921.624138] RSP: 0018:ffff9db6b3f03c70 EFLAGS: 00000246 [1666921.624139] RAX: 0000000000000000 RBX: ffff9dadd52d4640 RCX: 0000000000990000 [1666921.624140] RDX: ffff9dbf3e0db780 RSI: 0000000000b90001 RDI: ffff9da97bfb19c8 [1666921.624141] RBP: ffff9db6b3f03c70 R08: ffff9dbf3dfdb780 R09: 0000000000000000 [1666921.624142] R10: 0000000000000000 R11: fffff902ea4d6200 R12: ffff9db6b3f03c38 [1666921.624143] R13: ffff9db761aa1640 R14: ffff9db61ebec2d8 R15: ffff9db761aa1728 [1666921.624144] FS: 0000000000000000(0000) GS:ffff9dbf3dfc0000(0000) knlGS:0000000000000000 [1666921.624146] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1666921.624147] CR2: 00007fe809afedb8 CR3: 000000153ac10000 CR4: 00000000003607e0 [1666921.624148] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1666921.624149] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1666921.624150] Call Trace: [1666921.624155] [] queued_spin_lock_slowpath+0xb/0xf [1666921.624160] [] _raw_spin_lock+0x20/0x30 [1666921.624190] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1666921.624200] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1666921.624223] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1666921.624237] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1666921.624252] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1666921.624267] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1666921.624281] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1666921.624284] [] ? wake_up_state+0x20/0x20 [1666921.624298] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1666921.624301] [] kthread+0xd1/0xe0 [1666921.624303] [] ? insert_kthread_work+0x40/0x40 [1666921.624306] [] ret_from_fork_nospec_begin+0x7/0x21 [1666921.624307] [] ? insert_kthread_work+0x40/0x40 [1666921.624308] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1666949.623168] NMI watchdog: BUG: soft lockup - CPU#19 stuck for 22s! [ldlm_bl_13:15334] [1666949.624170] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) ib_uverbs(OE) [1666949.624210] mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: libcfs] [1666949.624250] CPU: 19 PID: 15334 Comm: ldlm_bl_13 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1666949.624252] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1666949.624254] task: ffff9dbf367cd140 ti: ffff9db6b3f00000 task.ti: ffff9db6b3f00000 [1666949.624255] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1666949.624263] RSP: 0018:ffff9db6b3f03c70 EFLAGS: 00000246 [1666949.624264] RAX: 0000000000000000 RBX: ffff9dac4a065680 RCX: 0000000000990000 [1666949.624265] RDX: ffff9dbf3de5b780 RSI: 0000000000690001 RDI: ffff9da97bfb19c8 [1666949.624266] RBP: ffff9db6b3f03c70 R08: ffff9dbf3dfdb780 R09: 0000000000000000 [1666949.624267] R10: 0000000000000000 R11: fffff902ee1bca80 R12: ffff9db6b3f03c38 [1666949.624268] R13: ffff9db761aa1640 R14: ffff9db61ebec2d8 R15: ffff9db761aa1728 [1666949.624270] FS: 0000000000000000(0000) GS:ffff9dbf3dfc0000(0000) knlGS:0000000000000000 [1666949.624271] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1666949.624272] CR2: 00007fe809afedb8 CR3: 000000153ac10000 CR4: 00000000003607e0 [1666949.624273] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1666949.624274] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1666949.624275] Call Trace: [1666949.624281] [] queued_spin_lock_slowpath+0xb/0xf [1666949.624285] [] _raw_spin_lock+0x20/0x30 [1666949.624315] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1666949.624325] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1666949.624349] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1666949.624363] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1666949.624377] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1666949.624392] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1666949.624406] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1666949.624410] [] ? wake_up_state+0x20/0x20 [1666949.624424] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1666949.624426] [] kthread+0xd1/0xe0 [1666949.624428] [] ? insert_kthread_work+0x40/0x40 [1666949.624431] [] ret_from_fork_nospec_begin+0x7/0x21 [1666949.624433] [] ? insert_kthread_work+0x40/0x40 [1666949.624434] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1667013.931223] Lustre: Unmounted ai400-client [1667099.249572] LNet: Removed LNI 10.0.13.150@o2ib10 [1668611.420520] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [1668611.421852] alg: No test for adler32 (adler32-zlib) [1668612.211637] Lustre: Lustre: Build Version: 2.12.58 [1668612.278428] LNet: 19632:0:(config.c:1627:lnet_inet_enumerate()) lnet: Ignoring interface dummy0: it's down [1668612.278446] LNet: Using FMR for registration [1668612.287425] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [1668612.322845] LustreError: 156-2: The client profile 'scratch0-client' could not be read from the MGS. Does that filesystem exist? [1668612.324724] Lustre: Unmounted scratch0-client [1668612.325472] LustreError: 19617:0:(obd_mount.c:1669:lustre_fill_super()) Unable to mount (-22) [1668654.288490] LNet: Removed LNI 10.0.13.150@o2ib10 [1668665.164527] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [1668665.165984] alg: No test for adler32 (adler32-zlib) [1668665.955057] Lustre: Lustre: Build Version: 2.12.58 [1668666.020582] LNet: 20144:0:(config.c:1627:lnet_inet_enumerate()) lnet: Ignoring interface dummy0: it's down [1668666.020600] LNet: Using FMR for registration [1668666.029914] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [1668667.224729] Lustre: Mounted ai400-client [1761688.599112] nr_pdflush_threads exported in /proc is scheduled for removal [1761753.257232] Lustre: Unmounted ai400-client [1761759.514302] LNet: Removed LNI 10.0.13.150@o2ib10 [1761999.062620] bash (151891): drop_caches: 3 [1762091.694965] bash (177005): drop_caches: 3 [1810947.632712] LNet: HW NUMA nodes: 2, HW CPU cores: 24, npartitions: 2 [1810947.634539] alg: No test for adler32 (adler32-zlib) [1810948.446659] Lustre: Lustre: Build Version: 2.12.58_145_gfcf219d [1810948.542088] LNet: 73516:0:(config.c:1641:lnet_inet_enumerate()) lnet: Ignoring interface dummy0: it's down [1810948.542110] LNet: Using FMR for registration [1810948.553343] LNet: Added LNI 10.0.13.150@o2ib10 [8/256/0/180] [1810954.609422] LustreError: 73502:0:(mgc_request.c:250:do_config_log_add()) MGC10.0.10.175@o2ib10: failed processing log, type 1: rc = -5 [1810962.368375] LustreError: 73606:0:(mgc_request.c:598:do_requeue()) failed processing log: -5 [1810973.847004] LustreError: 11-0: ai400-MDT0000-mdc-ffff9dbf38fd7000: operation mds_connect to node 10.0.10.175@o2ib10 failed: rc = -16 [1810998.903577] LustreError: 11-0: ai400-MDT0000-mdc-ffff9dbf38fd7000: operation mds_connect to node 10.0.10.175@o2ib10 failed: rc = -16 [1810998.906296] LustreError: Skipped 2 previous similar messages [1811022.698358] LustreError: 11-0: ai400-MDT0002-mdc-ffff9dbf38fd7000: operation mds_connect to node 10.0.10.177@o2ib10 failed: rc = -16 [1811047.735524] LustreError: 11-0: ai400-MDT0002-mdc-ffff9dbf38fd7000: operation mds_connect to node 10.0.10.177@o2ib10 failed: rc = -16 [1811047.738213] LustreError: Skipped 4 previous similar messages [1811073.830668] Lustre: Mounted ai400-client [1811596.811282] NMI watchdog: BUG: soft lockup - CPU#3 stuck for 23s! [ldlm_bl_06:74558] [1811596.812575] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) tracedev(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) [1811596.812623] ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: mmfslinux] [1811596.812674] CPU: 3 PID: 74558 Comm: ldlm_bl_06 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1811596.812676] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1811596.812678] task: ffff9dbf39065140 ti: ffff9dbf26e94000 task.ti: ffff9dbf26e94000 [1811596.812680] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1811596.812692] RSP: 0018:ffff9dbf26e97c70 EFLAGS: 00000246 [1811596.812694] RAX: 0000000000000000 RBX: ffff9da2dc7ff840 RCX: 0000000000190000 [1811596.812695] RDX: ffff9dbf3e05b780 RSI: 0000000000a90001 RDI: ffff9da67247c058 [1811596.812696] RBP: ffff9dbf26e97c70 R08: ffff9daf3e6db780 R09: 0000000000000000 [1811596.812697] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9dbf26e97c38 [1811596.812699] R13: ffff9da647e60000 R14: ffff9dac3df750e8 R15: ffff9da647e60098 [1811596.812701] FS: 0000000000000000(0000) GS:ffff9daf3e6c0000(0000) knlGS:0000000000000000 [1811596.812702] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1811596.812703] CR2: 0000000000b37000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1811596.812705] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1811596.812706] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1811596.812707] Call Trace: [1811596.812715] [] queued_spin_lock_slowpath+0xb/0xf [1811596.812722] [] _raw_spin_lock+0x20/0x30 [1811596.812759] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1811596.812772] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1811596.812798] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1811596.812816] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1811596.812834] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1811596.812853] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1811596.812871] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1811596.812876] [] ? wake_up_state+0x20/0x20 [1811596.812893] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1811596.812896] [] kthread+0xd1/0xe0 [1811596.812899] [] ? insert_kthread_work+0x40/0x40 [1811596.812903] [] ret_from_fork_nospec_begin+0x7/0x21 [1811596.812905] [] ? insert_kthread_work+0x40/0x40 [1811596.812906] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1811596.812933] Kernel panic - not syncing: softlockup: hung tasks [1811596.814059] CPU: 3 PID: 74558 Comm: ldlm_bl_06 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1811596.816349] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1811596.817548] Call Trace: [1811596.818739] [] dump_stack+0x19/0x1b [1811596.819963] [] panic+0xe8/0x21f [1811596.821163] [] ? show_regs+0x58/0x210 [1811596.822281] NMI watchdog: BUG: soft lockup - CPU#8 stuck for 23s! [ldlm_bl_08:74560] [1811596.822305] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) tracedev(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) [1811596.822326] ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: mmfslinux] [1811596.822329] CPU: 8 PID: 74560 Comm: ldlm_bl_08 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1811596.822330] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1811596.822331] task: ffff9dbf390630c0 ti: ffff9dbb77e7c000 task.ti: ffff9dbb77e7c000 [1811596.822336] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1811596.822337] RSP: 0018:ffff9dbb77e7fc70 EFLAGS: 00000246 [1811596.822338] RAX: 0000000000000000 RBX: ffff9da2dc7ef0c0 RCX: 0000000000410000 [1811596.822339] RDX: ffff9dbf3de5b780 RSI: 0000000000690000 RDI: ffff9da67247c058 [1811596.822340] RBP: ffff9dbb77e7fc70 R08: ffff9daf3e81b780 R09: 0000000000000000 [1811596.822340] R10: 0000000000000000 R11: fffff90300e69a00 R12: ffff9dbb77e7fc38 [1811596.822341] R13: ffff9da9ab1ed680 R14: ffff9da9ea22f958 R15: ffff9da9ab1ed718 [1811596.822342] FS: 0000000000000000(0000) GS:ffff9daf3e800000(0000) knlGS:0000000000000000 [1811596.822343] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1811596.822344] CR2: 0000000002451000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1811596.822345] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1811596.822346] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1811596.822346] Call Trace: [1811596.822349] [] queued_spin_lock_slowpath+0xb/0xf [1811596.822352] [] _raw_spin_lock+0x20/0x30 [1811596.822378] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1811596.822387] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1811596.822408] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1811596.822424] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1811596.822441] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1811596.822459] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1811596.822476] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1811596.822479] [] ? wake_up_state+0x20/0x20 [1811596.822496] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1811596.822497] [] kthread+0xd1/0xe0 [1811596.822499] [] ? insert_kthread_work+0x40/0x40 [1811596.822502] [] ret_from_fork_nospec_begin+0x7/0x21 [1811596.822503] [] ? insert_kthread_work+0x40/0x40 [1811596.822520] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1811596.824281] NMI watchdog: BUG: soft lockup - CPU#9 stuck for 23s! [ldlm_bl_09:74561] [1811596.824304] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) tracedev(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) [1811596.824326] ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: mmfslinux] [1811596.824328] CPU: 9 PID: 74561 Comm: ldlm_bl_09 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1811596.824329] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1811596.824331] task: ffff9dbf39061040 ti: ffff9db4ae7f4000 task.ti: ffff9db4ae7f4000 [1811596.824336] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1811596.824337] RSP: 0018:ffff9db4ae7f7c70 EFLAGS: 00000246 [1811596.824338] RAX: 0000000000000000 RBX: ffff9da2dc7fcc80 RCX: 0000000000490000 [1811596.824339] RDX: ffff9daf3e7db780 RSI: 0000000000390000 RDI: ffff9da67247c058 [1811596.824339] RBP: ffff9db4ae7f7c70 R08: ffff9daf3e85b780 R09: 0000000000000000 [1811596.824340] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9db4ae7f7c38 [1811596.824341] R13: ffff9db6db39cbb0 R14: ffff9da87b75e6c8 R15: ffff9db6db39cc48 [1811596.824342] FS: 0000000000000000(0000) GS:ffff9daf3e840000(0000) knlGS:0000000000000000 [1811596.824343] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1811596.824343] CR2: 0000000001307000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1811596.824344] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1811596.824345] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1811596.824345] Call Trace: [1811596.824348] [] queued_spin_lock_slowpath+0xb/0xf [1811596.824351] [] _raw_spin_lock+0x20/0x30 [1811596.824371] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1811596.824379] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1811596.824395] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1811596.824412] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1811596.824430] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1811596.824448] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1811596.824467] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1811596.824469] [] ? wake_up_state+0x20/0x20 [1811596.824488] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1811596.824490] [] kthread+0xd1/0xe0 [1811596.824492] [] ? insert_kthread_work+0x40/0x40 [1811596.824494] [] ret_from_fork_nospec_begin+0x7/0x21 [1811596.824496] [] ? insert_kthread_work+0x40/0x40 [1811596.824513] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1811596.895281] NMI watchdog: BUG: soft lockup - CPU#13 stuck for 23s! [ldlm_bl_19:74584] [1811596.895323] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) tracedev(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) [1811596.895365] ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: mmfslinux] [1811596.895368] CPU: 13 PID: 74584 Comm: ldlm_bl_19 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1811596.895369] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1811596.895371] task: ffff9dbd8ff8a080 ti: ffff9db15d8b4000 task.ti: ffff9db15d8b4000 [1811596.895381] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1811596.895382] RSP: 0018:ffff9db15d8b7c70 EFLAGS: 00000246 [1811596.895383] RAX: 0000000000000000 RBX: ffff9da2dc7ecf00 RCX: 0000000000690000 [1811596.895383] RDX: ffff9daf3e85b780 RSI: 0000000000490000 RDI: ffff9da67247c058 [1811596.895384] RBP: ffff9db15d8b7c70 R08: ffff9dbf3de5b780 R09: 0000000000000000 [1811596.895385] R10: 0000000000000000 R11: fffff903358baf00 R12: ffff9db15d8b7c38 [1811596.895386] R13: ffff9dbf2b682070 R14: ffff9db843658c68 R15: ffff9dbf2b682108 [1811596.895387] FS: 0000000000000000(0000) GS:ffff9dbf3de40000(0000) knlGS:0000000000000000 [1811596.895388] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1811596.895388] CR2: 0000000002090000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1811596.895389] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1811596.895390] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1811596.895391] Call Trace: [1811596.895397] [] queued_spin_lock_slowpath+0xb/0xf [1811596.895403] [] _raw_spin_lock+0x20/0x30 [1811596.895438] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1811596.895451] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1811596.895481] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1811596.895499] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1811596.895518] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1811596.895537] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1811596.895556] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1811596.895560] [] ? wake_up_state+0x20/0x20 [1811596.895578] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1811596.895581] [] kthread+0xd1/0xe0 [1811596.895583] [] ? insert_kthread_work+0x40/0x40 [1811596.895586] [] ret_from_fork_nospec_begin+0x7/0x21 [1811596.895588] [] ? insert_kthread_work+0x40/0x40 [1811596.895605] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1811596.900279] NMI watchdog: BUG: soft lockup - CPU#15 stuck for 23s! [ldlm_bl_14:74574] [1811596.900303] Modules linked in: mgc(OE) lustre(OE) lmv(OE) mdc(OE) fid(OE) osc(OE) lov(OE) fld(OE) ko2iblnd(OE) ptlrpc(OE) obdclass(OE) lnet(OE) libcfs(OE) tracedev(OE) xt_statistic dummy vport_vxlan openvswitch nf_conntrack_ipv6 nf_nat_ipv6 nf_defrag_ipv6 xt_NFLOG xt_physdev nfnetlink_log ip_set_hash_ip xt_set ip_set ipt_REJECT nf_reject_ipv4 xt_comment xt_nat veth vxlan ip6_udp_tunnel udp_tunnel iptable_mangle xt_mark ipt_MASQUERADE nf_nat_masquerade_ipv4 nf_conntrack_netlink nfnetlink iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter xt_conntrack nf_nat nf_conntrack libcrc32c br_netfilter bridge stp llc nfsv3 nfs_acl nfs lockd grace fscache overlay(T) rdma_ucm(OE) ib_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_fpga_tools(OE) mlx5_ib(OE) [1811596.900324] ib_uverbs(OE) mlx5_core(OE) mlxfw(OE) mlx4_en(OE) sunrpc iTCO_wdt iTCO_vendor_support mxm_wmi sb_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd pcspkr joydev sg i2c_i801 mei_me lpc_ich ioatdma mei ipmi_si ipmi_devintf ipmi_msghandler wmi acpi_pad acpi_power_meter binfmt_misc knem(OE) ip_tables ext4 mbcache jbd2 mlx4_ib(OE) ib_core(OE) sd_mod crc_t10dif crct10dif_generic mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm ahci drm libahci igb crct10dif_pclmul crct10dif_common crc32c_intel libata mlx4_core(OE) ptp pps_core devlink dca i2c_algo_bit drm_panel_orientation_quirks mlx_compat(OE) [last unloaded: mmfslinux] [1811596.900327] CPU: 15 PID: 74574 Comm: ldlm_bl_14 Kdump: loaded Tainted: G OEL ------------ T 3.10.0-957.27.2.el7.x86_64 #1 [1811596.900327] Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS SE5C610.86B.01.01.0027.071020182329 07/10/2018 [1811596.900329] task: ffff9dbf043e6180 ti: ffff9db278e28000 task.ti: ffff9db278e28000 [1811596.900334] RIP: 0010:[] [] native_queued_spin_lock_slowpath+0x126/0x200 [1811596.900335] RSP: 0018:ffff9db278e2bc70 EFLAGS: 00000246 [1811596.900336] RAX: 0000000000000000 RBX: ffff9da2dc7f9cc0 RCX: 0000000000790000 [1811596.900337] RDX: ffff9daf3e6db780 RSI: 0000000000190001 RDI: ffff9da67247c058 [1811596.900337] RBP: ffff9db278e2bc70 R08: ffff9dbf3dedb780 R09: 0000000000000000 [1811596.900338] R10: 0000000000000000 R11: fffff90340c3cf80 R12: ffff9db278e2bc38 [1811596.900339] R13: ffff9db5c2ef3610 R14: ffff9dba1c2b4908 R15: ffff9db5c2ef36a8 [1811596.900340] FS: 0000000000000000(0000) GS:ffff9dbf3dec0000(0000) knlGS:0000000000000000 [1811596.900340] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1811596.900341] CR2: 00000000019fe000 CR3: 000000153ac10000 CR4: 00000000003607e0 [1811596.900342] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1811596.900343] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1811596.900343] Call Trace: [1811596.900346] [] queued_spin_lock_slowpath+0xb/0xf [1811596.900349] [] _raw_spin_lock+0x20/0x30 [1811596.900368] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1811596.900376] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1811596.900392] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1811596.900409] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1811596.900426] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1811596.900444] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1811596.900461] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1811596.900463] [] ? wake_up_state+0x20/0x20 [1811596.900480] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1811596.900482] [] kthread+0xd1/0xe0 [1811596.900484] [] ? insert_kthread_work+0x40/0x40 [1811596.900487] [] ret_from_fork_nospec_begin+0x7/0x21 [1811596.900489] [] ? insert_kthread_work+0x40/0x40 [1811596.900506] Code: 0d 48 98 83 e2 30 48 81 c2 80 b7 01 00 48 03 14 c5 a0 bf 34 86 4c 89 02 41 8b 40 08 85 c0 75 0f 0f 1f 44 00 00 f3 90 41 8b 40 08 <85> c0 74 f6 4d 8b 08 4d 85 c9 74 04 41 0f 18 09 8b 17 0f b7 c2 [1811597.091231] [] watchdog_timer_fn+0x231/0x240 [1811597.092577] [] ? watchdog+0x40/0x40 [1811597.093891] [] __hrtimer_run_queues+0xf3/0x270 [1811597.095180] [] hrtimer_interrupt+0xaf/0x1d0 [1811597.096440] [] local_apic_timer_interrupt+0x3b/0x60 [1811597.097674] [] smp_apic_timer_interrupt+0x43/0x60 [1811597.098880] [] apic_timer_interrupt+0x162/0x170 [1811597.100057] [] ? native_queued_spin_lock_slowpath+0x126/0x200 [1811597.101232] [] queued_spin_lock_slowpath+0xb/0xf [1811597.102380] [] _raw_spin_lock+0x20/0x30 [1811597.103517] [] cl_object_attr_lock+0x1a/0x20 [obdclass] [1811597.104621] [] osc_ldlm_blocking_ast+0x2f6/0x3a0 [osc] [1811597.105709] [] ldlm_cancel_callback+0x8a/0x330 [ptlrpc] [1811597.106771] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [1811597.107816] [] ldlm_cli_cancel_local+0xa0/0x3f0 [ptlrpc] [1811597.108838] [] ldlm_cli_cancel_list_local+0xea/0x260 [ptlrpc] [1811597.109835] [] ldlm_bl_thread_main+0x5a0/0xa40 [ptlrpc] [1811597.110792] [] ? wake_up_state+0x20/0x20 [1811597.111746] [] ? ldlm_handle_bl_callback+0x4e0/0x4e0 [ptlrpc] [1811597.112678] [] kthread+0xd1/0xe0 [1811597.113599] [] ? insert_kthread_work+0x40/0x40 [1811597.114519] [] ret_from_fork_nospec_begin+0x7/0x21 [1811597.115438] [] ? insert_kthread_work+0x40/0x40