diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/e2fsck.h ./e2fsprogs-1.41.12.2.ora1/e2fsck/e2fsck.h --- old/e2fsprogs-1.41.12.2.ora1/e2fsck/e2fsck.h 2010-10-08 03:48:33.000000000 -0700 +++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/e2fsck.h 2011-01-09 21:47:00.450433703 -0800 @@ -337,8 +337,10 @@ struct e2fsck_struct { char *journal_name; /* lustre support */ +#define LMV_MAX_MDTS 8 int lustre_devtype; - char *lustre_mdsdb; + int lustre_mds_files; + char *lustre_mdsdb[LMV_MAX_MDTS]; char *lustre_ostdb; struct lfsck_outdb_info *lfsck_oinfo; diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.c ./e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.c --- old/e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.c 2010-10-08 03:48:33.000000000 -0700 +++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.c 2011-01-16 10:12:44.660964172 -0800 @@ -83,10 +83,19 @@ struct lfsck_fids { __u64 *fids; }; +struct lfsck_mdt_check_info { + struct lfsck_mds_hdr *mdt_hdr; + DB *mdt_fldb; + DB *mdt_dfiddb; + DB *mdt_oidb; + DB *mdt_hdrdb; + DB *mdt_direntdb; + DB *mdt_sizeinfodb; +}; + struct lfsck_thread_info { struct lfsck_mds_hdr *mds_hdr; - DB *mds_direntdb; - DB *mds_sizeinfodb; + struct lfsck_mdt_check_info *mds_info; __u32 start_ost_idx; __u32 end_ost_idx; int status; @@ -132,13 +141,15 @@ char mnt_path[PATH_MAX]; char *mds_file; char lostandfounddir[PATH_MAX]; char dupedir[PATH_MAX]; -char *ost_files[LOV_MAX_OSTS]; +char *ost_files[LOV_MAX_OSTS] = { NULL }; +char *mdt_files[LMV_MAX_MDTS] = { NULL }; int num_ost_files; +int num_mdt_files; struct obd_uuid lfsck_uuid[LOV_MAX_OSTS]; int lov_tgt_count = LOV_MAX_OSTS; -struct lfsck_saved_duplicates *lfsck_duplicates; +struct lfsck_saved_duplicates *lfsck_duplicates = NULL; int lfsck_dup_saved; int num_renamed; int fixed; @@ -316,24 +327,35 @@ int parse_args(int argc, char *argv[]) lfsck_save++; break; case 'm': - VERBOSE(1, "MDSDB: %s\n", optarg); - dbpath = malloc(PATH_MAX); - if (dbpath == NULL) { - fprintf(stderr, "error allocating dbpath\n"); - return -ENOMEM; - } - strcpy(tmp, optarg); - if (realpath(my_dirname(tmp), dbpath) == NULL) { - fprintf(stderr, "Failure to resolve path %s\n", - optarg); - free(dbpath); - exit(1); - } - - strcpy(tmp, optarg); - sprintf(dbpath+strlen(dbpath), "/%s", my_basename(tmp)); - mds_file = dbpath; + { + char *mdt_path; + VERBOSE(1, "MDSDB[%u]: %s\n", num_mdt_files, optarg); + p1 = optarg; + do { + dbpath = malloc(PATH_MAX); + if (dbpath == NULL) { + fprintf(stderr, "error allocating dbpath\n"); + return -ENOMEM; + } + /* Old-style arguments are comma separated */ + mdt_path = strsep(&p1, ","); + strcpy(tmp, optarg); + if (realpath(my_dirname(tmp), dbpath) == NULL) { + fprintf(stderr, "Failure to resolve path %s\n", + optarg); + for (c = 0; c < num_mdt_files; c++) + free(mdt_files[c]); + free(dbpath); + exit(1); + } + strcpy(tmp, mdt_path); + sprintf(dbpath+strlen(dbpath), "/%s", + my_basename(tmp)); + mdt_files[num_mdt_files] = dbpath; + num_mdt_files++; + } while (p1 != NULL); break; + } case 'n': lfsck_create = 0; lfsck_delete = 0; @@ -378,7 +400,6 @@ int parse_args(int argc, char *argv[]) ost_files[num_ost_files] = dbpath; num_ost_files++; } while (p1 != NULL); - break; } case 't': @@ -442,7 +463,7 @@ int parse_args(int argc, char *argv[]) } } - if (mds_file == NULL || ost_files[0] == NULL) { + if (mdt_files[0] == NULL || ost_files[0] == NULL) { fprintf(stderr, "--mdsdb or --ostdb unspecified\n"); return(-EINVAL); } @@ -761,7 +782,7 @@ int lfsck_recreate_obj(__u64 mds_fid, __ */ int lfsck_calc_size(struct lfsck_mds_objent *mds_obj, struct lfsck_ost_objent *ost_obj, - DB *mds_sizeinfodb) + DB *mdt_sizeinfodb) { struct lfsck_mds_szinfo mds_szinfo1; __u64 calc_size; @@ -780,7 +801,7 @@ int lfsck_calc_size(struct lfsck_mds_obj data.data = &mds_szinfo1; data.size = data.ulen = sizeof(mds_szinfo1); data.flags = DB_DBT_USERMEM; - if ((rc = mds_sizeinfodb->get(mds_sizeinfodb, NULL, &key, &data, 0))) { + if ((rc = mdt_sizeinfodb->get(mdt_sizeinfodb, NULL, &key, &data, 0))) { log_write("Failure to get sizeinfo "LPU64"\n",mds_obj->mds_fid); pthread_mutex_unlock(&size_lock); return (-ENOENT); @@ -818,7 +839,7 @@ int lfsck_calc_size(struct lfsck_mds_obj data.size = sizeof(mds_szinfo1); cputole_mds_szinfo(&mds_szinfo1); /* Make sure we overwrite */ - if ((rc = mds_sizeinfodb->put(mds_sizeinfodb, + if ((rc = mdt_sizeinfodb->put(mdt_sizeinfodb, NULL, &key, &data, 0)) != 0 ) { log_write("Failure to update sizeinfo data\n"); pthread_mutex_unlock(&size_lock); @@ -836,7 +857,7 @@ int lfsck_calc_size(struct lfsck_mds_obj */ int lfsck_run_pass2(__u32 ost_idx, struct lfsck_mds_hdr *mds_hdr, DB *mds_ostdb, DB *ostdb, - DB *mds_direntdb, DB *mds_sizeinfodb) + DB *mds_direntdb, DB *mdt_sizeinfodb) { struct lfsck_mds_objent mds_obj1; struct lfsck_ost_objent ost_obj1; @@ -914,7 +935,7 @@ int lfsck_run_pass2(__u32 ost_idx, struc } } #ifdef CHECK_SIZE - if (lfsck_calc_size(&mds_obj1, &ost_obj1, mds_sizeinfodb)) { + if (lfsck_calc_size(&mds_obj1, &ost_obj1, mdt_sizeinfodb)) { log_write("[%u]: error updating file size for object " LPU64": %s\n", ost_idx,objid,db_strerror(rc)); rc = -EINVAL; @@ -983,7 +1004,7 @@ int lfsck_fix_orphan(__u32 ost_idx, __u6 lum->lmm_stripe_offset = 0; lum->lmm_stripe_count = 1; lum->lmm_objects[0].l_object_id = ost_objid; - lum->lmm_objects[0].l_object_gr = ost_group; + lum->lmm_objects[0].l_object_seq = ost_group; lum->lmm_objects[0].l_ost_gen = 0; lum->lmm_objects[0].l_ost_idx = ost_idx; @@ -1163,7 +1184,7 @@ int lfsck_list_affected_files(char *mds_ } sprintf(dbname, "%s.%d", MDS_OSTDB, ost_idx); - if ((rc = lfsck_opendb(mds_file, dbname, &mds_db, 1, 0, 0)) != 0) { + if ((rc = lfsck_opendb(mds_file, dbname, &mds_db, 1, 0, 0, DB_HASH)) != 0) { log_write("failed to open mds db file %s\n", mds_file); rc = -EINVAL; goto out; @@ -1209,135 +1230,198 @@ out: return(rc); } +static int lfsck_get_mdt_hdr(DB *mdsdb, struct lfsck_mds_hdr *mdt_hdr) +{ + DBT key, data; + int rc; + + assert(mdt_hdr != NULL); + + mdt_hdr->mds_magic = MDS_MAGIC; + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.data = &mdt_hdr->mds_magic; + key.size = sizeof(mdt_hdr->mds_magic); + data.data = mdt_hdr; + data.size = sizeof(*mdt_hdr); + data.ulen = sizeof(*mdt_hdr); + data.flags = DB_DBT_USERMEM; + + rc = mdsdb->get(mdsdb, NULL, &key, &data, 0); + if (rc != 0) { + log_write("%s: error getting mdt_hdr info: %s\n", + progname, db_strerror(rc)); + return rc; + } + letocpu_mds_hdr(mdt_hdr); + return rc; +} + /* * For each ost index run checks 1 2 and 3. * 1) Check for object referenced by more than one file * 2) Check that objects exist on ost * 3) Check that containg mds entry exists for an object */ -int run_test(__u32 ost_idx, struct lfsck_mds_hdr *mds_hdr, - DB *mds_direntdb, DB *mds_sizeinfodb ) +int run_test(__u32 ost_idx, struct lfsck_mdt_check_info *mdt_info) { + struct lfsck_mds_hdr *mdt_hdr = NULL; struct lfsck_ost_hdr *ost_hdr = NULL; - char dbname[256]; + char dbname[256], ost_dbname[256]; DB *mds_ostdb = NULL; DB *ost_db = NULL; + DB *mdt_hdrdb = NULL; DBT key, data; __u64 last_id; - int i, rc; + int i, j, rc; + + mdt_hdr = malloc(sizeof(*mdt_hdr)); + if (mdt_hdr == NULL) { + log_write("Failure to alloc memory \n"); + rc = -ENOMEM; + goto out; + } + + ost_hdr = malloc(sizeof(*ost_hdr)); + if (ost_hdr == NULL) { + log_write("Failure to alloc memory\n"); + rc = -ENOMEM; + goto out; + } sprintf(dbname, "%s.%d", MDS_OSTDB, ost_idx); VERBOSE(2, "testing ost_idx %d\n", ost_idx); - rc = lfsck_opendb(mds_file, dbname, &mds_ostdb, 1, 0, 0); - if (rc != 0) { - log_write("failed to open mds db file %s: %s\n", - mds_file, db_strerror(rc)); - goto out; - } - - ost_hdr = malloc(sizeof(*ost_hdr)); - if (ost_hdr == NULL) { - log_write("Failure to alloc memory\n"); - rc = -ENOMEM; - goto out; - } - - - VERBOSE(2, "looking for index %u UUID %s\n", ost_idx, - lfsck_uuid[ost_idx].uuid); - - for (i = 0; i < num_ost_files; i++) { - VERBOSE(2, "checking file %s\n", ost_files[i]); - rc = lfsck_opendb(ost_files[i], OST_HDR, &ost_db, 0, 0, 0); - if (rc != 0) { - log_write("Error opening ost_data_file %s: rc %d\n", - ost_files[i], rc); - goto out; - } - memset(&key, 0, sizeof(key)); - memset(&data, 0, sizeof(data)); - ost_hdr->ost_magic = OST_MAGIC; - key.data = &ost_hdr->ost_magic; - key.size = sizeof(ost_hdr->ost_magic); - data.size = data.ulen = sizeof(*ost_hdr); - data.data = ost_hdr; - data.flags = DB_DBT_USERMEM; - - rc = ost_db->get(ost_db, NULL, &key, &data, 0); - ost_db->close(ost_db, 0); - ost_db = NULL; - if (rc != 0) { - log_write("Invalid ost magic on file %s: rc %s\n", - ost_files[i], db_strerror(rc)); - continue; - } - - letocpu_ost_hdr(ost_hdr); - VERBOSE(2, "%s has ost UUID %s\n", ost_files[i], - ost_hdr->ost_uuid.uuid); - - if (obd_uuid_equals(&lfsck_uuid[ost_idx], &ost_hdr->ost_uuid)) { - if (ost_hdr->ost_index != ost_idx) { - log_write("Requested ost_idx %u doesn't match " - "index %u found in %s\n", ost_idx, - ost_hdr->ost_index, ost_files[i]); - continue; - } - - break; - } - } - - if (i == num_ost_files) { - log_write("lfsck: can't find file for ost_idx %d\n", ost_idx); - rc = lfsck_list_affected_files(mds_file, mds_hdr, - mds_direntdb, ost_idx); - goto out; - } - rc = lfsck_opendb(ost_files[i], OST_OSTDB, &ost_db, 0, 0, 0); - if (rc != 0) { - log_write("error opening ost_data_file %s: rc %d\n", - ost_files[i], rc); - goto out; - } - - VERBOSE(1, "MDS: max_id "LPU64" OST: max_id "LPU64"\n", - mds_hdr->mds_max_ost_id[ost_idx], ost_hdr->ost_last_id); - - rc = lfsck_run_pass1(ost_idx, mds_ostdb, ost_db, mds_direntdb); - if (rc != 0) { - log_write("error in running pass1\n"); - goto out; - } - - rc = lfsck_run_pass2(ost_idx, mds_hdr, mds_ostdb, ost_db, mds_direntdb, - mds_sizeinfodb); - if (rc != 0) { - log_write("error in running pass2\n"); - goto out; - } - - last_id = (ost_hdr->ost_flags & E2F_OPT_READONLY || - mds_hdr->mds_flags & E2F_OPT_READONLY) ? - mds_hdr->mds_max_ost_id[ost_idx] : ost_hdr->ost_last_id; - - rc = lfsck_run_pass3(ost_idx, mds_ostdb, ost_db, ost_hdr->ost_uuid, - last_id); - if (rc != 0) { - log_write("error in running pass3\n"); - goto out; - } - rc = 0; - + for (j = 0; j < num_mdt_files; j++) { + rc = lfsck_opendb(mdt_files[j], dbname, &mds_ostdb, 1, 0, 0, DB_HASH); + if (rc != 0) { + log_write("failed to open mds db file %s: %s\n", + mdt_files[j], db_strerror(rc)); + goto out; + } + + rc = lfsck_opendb(mdt_files[j], MDS_HDR, &mdt_hdrdb, 0, 0, 0, DB_HASH); + if (rc != 0) { + log_write("failed to open mds db file %s: %s\n", + mdt_files[j], db_strerror(rc)); + goto out; + } + + rc = lfsck_get_mdt_hdr(mdt_hdrdb, mdt_hdr); + if (rc) { + log_write("Failure to get mdt_hdr %s \n", mdt_files[j]); + goto out; + } + mdt_hdrdb->close(mdt_hdrdb, 0); + mdt_hdrdb = NULL; + + VERBOSE(2, "looking for index %u UUID %s\n", ost_idx, + lfsck_uuid[ost_idx].uuid); + + for (i = 0; i < num_ost_files; i++) { + VERBOSE(2, "checking file %s\n", ost_files[i]); + rc = lfsck_opendb(ost_files[i], OST_HDR, &ost_db, 0, 0, 0, DB_HASH); + if (rc != 0) { + log_write("Error opening ost_data_file %s: rc %d\n", + ost_files[i], rc); + goto out; + } + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + ost_hdr->ost_magic = OST_MAGIC; + key.data = &ost_hdr->ost_magic; + key.size = sizeof(ost_hdr->ost_magic); + data.size = data.ulen = sizeof(*ost_hdr); + data.data = ost_hdr; + data.flags = DB_DBT_USERMEM; + + rc = ost_db->get(ost_db, NULL, &key, &data, 0); + ost_db->close(ost_db, 0); + ost_db = NULL; + if (rc != 0) { + log_write("Invalid ost magic on file %s: rc %s\n", + ost_files[i], db_strerror(rc)); + continue; + } + + letocpu_ost_hdr(ost_hdr); + VERBOSE(2, "%s has ost UUID %s\n", ost_files[i], + ost_hdr->ost_uuid.uuid); + + if (obd_uuid_equals(&lfsck_uuid[ost_idx], &ost_hdr->ost_uuid)) { + if (ost_hdr->ost_index != ost_idx) { + log_write("Requested ost_idx %u doesn't match " + "index %u found in %s\n", ost_idx, + ost_hdr->ost_index, ost_files[i]); + continue; + } + + break; + } + } + + if (i == num_ost_files) { + log_write("lfsck: can't find file for ost_idx %d\n", ost_idx); + rc = lfsck_list_affected_files(mdt_files[j], mdt_hdr, + mdt_info[i].mdt_direntdb, ost_idx); + goto out; + } + + sprintf(ost_dbname, "%s.%d", OST_OSTDB, mdt_hdr->mds_index); + rc = lfsck_opendb(ost_files[i], ost_dbname, &ost_db, 0, 0, 0, DB_HASH); + if (rc != 0) { + log_write("error opening ost_data_file %s: rc %d\n", + ost_files[i], rc); + goto out; + } + + /* FIXME */ + VERBOSE(1, "MDS%d: max_id "LPU64" OST: max_id "LPU64"\n", mdt_hdr->mds_index, + mdt_info[j].mdt_hdr->mds_max_ost_id[ost_idx], ost_hdr->ost_last_id[0]); + + rc = lfsck_run_pass1(ost_idx, mds_ostdb, ost_db, mdt_info[j].mdt_direntdb); + if (rc != 0) { + log_write("error in running pass1\n"); + goto out; + } + + rc = lfsck_run_pass2(ost_idx, mdt_info[j].mdt_hdr, mds_ostdb, ost_db, + mdt_info[j].mdt_direntdb, mdt_info[j].mdt_sizeinfodb); + if (rc != 0) { + log_write("error in running pass2\n"); + goto out; + } + + /* FIXME */ + last_id = (ost_hdr->ost_flags & E2F_OPT_READONLY || + mdt_info[j].mdt_hdr->mds_flags & E2F_OPT_READONLY) ? + mdt_info[j].mdt_hdr->mds_max_ost_id[ost_idx] : + ost_hdr->ost_last_id[0]; + + rc = lfsck_run_pass3(ost_idx, mds_ostdb, ost_db, ost_hdr->ost_uuid, + last_id); + if (rc != 0) { + log_write("error in running pass3\n"); + goto out; + } + mds_ostdb->close(mds_ostdb, 0); + ost_db->close(ost_db, 0); + rc = 0; + mds_ostdb = ost_db = NULL; + } out: + if (mdt_hdr) + free(mdt_hdr); if (ost_hdr) free(ost_hdr); if (mds_ostdb) mds_ostdb->close(mds_ostdb, 0); if (ost_db) ost_db->close(ost_db, 0); + if (mdt_hdrdb) + mdt_hdrdb->close(mdt_hdrdb, 0); return(rc); } @@ -1465,14 +1549,14 @@ void llapi_cancel_osc_locks(const char * /* Duplicate an object that is referenced by multiple files and point one * of the files to use the duplicated object */ int lfsck_fix_duplicate(__u64 mds_fid, __u32 mds_generation, - __u32 ost_idx, __u64 ost_objid, DB *mds_direntdb) + __u32 ost_idx, __u64 ost_objid, DB *mdt_direntdb) { char path_tmp[PATH_MAX] = { 0 }, path[PATH_MAX] = { 0 }; char tmp[PATH_MAX * 2 + 10] = { 0 }; const char *base; int rc; - if (lfsck_get_path(mds_fid, mds_direntdb, path, sizeof(path))) { + if (lfsck_get_path(mds_fid, mdt_direntdb, path, sizeof(path))) { log_write("%s: [%u]: failed to locate FID "LPU64 " duplicate objid "LPU64"\n", progname, ost_idx, mds_fid, ost_objid); @@ -1546,12 +1630,11 @@ out: * Check for files found that reference the same ost objects * (found in pass1) and repair now if necessary */ -int lfsck_run_pass4(DB *mds_direntdb) +int lfsck_run_pass4(DB *mdt_direntdb) { char tmp[PATH_MAX + 512]; int i, j; - log_write("lfsck: pass4: check for duplicate object references\n"); if (lfsck_dup_saved == 0) { log_write("lfsck: pass4 OK (no duplicates)\n"); return(0); @@ -1565,7 +1648,7 @@ int lfsck_run_pass4(DB *mds_direntdb) lfsck_duplicates[i].mds_generation, lfsck_duplicates[i].ost_idx, lfsck_duplicates[i].objid, - mds_direntdb)) { + mdt_direntdb)) { fix_failed++; } @@ -1592,7 +1675,7 @@ int lfsck_run_pass4(DB *mds_direntdb) * This is a placeholder to check for filesize correctness no fixup is in * place right now since file size is still obtained from osts */ -int lfsck_run_pass5(DB *mds_direntdb, DB *mds_sizeinfodb) +int lfsck_run_pass5(DB *mdt_direntdb, DB *mdt_sizeinfodb) { int rc = 0; #ifdef CHECK_SIZE @@ -1602,9 +1685,8 @@ int lfsck_run_pass5(DB *mds_direntdb, DB DBT key,data; DBC *dbcp; - log_write("lfsck: pass5: file size correctness\n"); - if ((rc = mds_sizeinfodb->cursor(mds_sizeinfodb, NULL, &dbcp, 0)) != 0){ + if ((rc = mdt_sizeinfodb->cursor(mdt_sizeinfodb, NULL, &dbcp, 0)) != 0){ log_write("%s: error acquiring cursor for database: %s\n", progname, db_strerror(rc)); rc = -EINVAL; @@ -1619,7 +1701,7 @@ int lfsck_run_pass5(DB *mds_direntdb, DB letocpu_mds_szinfo(&mds_szinfo1); if (mds_szinfo1.mds_size != mds_szinfo1.mds_calc_size) { - if (lfsck_get_path(mds_szinfo1.mds_fid, mds_direntdb, + if (lfsck_get_path(mds_szinfo1.mds_fid, mdt_direntdb, path, sizeof(path))) { log_write("%s: failed to get path and update " "size for fid "LPU64"\n", @@ -1700,8 +1782,7 @@ void *lfsck_start_thread(void *arg) if (!all_started) pthread_exit(NULL); for (i = tinfo->start_ost_idx; i < tinfo->end_ost_idx; i++) { - rc = run_test(i, tinfo->mds_hdr, tinfo->mds_direntdb, - tinfo->mds_sizeinfodb); + rc = run_test(i, tinfo->mds_info); if (rc) { log_write("lfsck: ost_idx %d: error running check\n",i); tinfo->status = rc; @@ -1710,83 +1791,342 @@ void *lfsck_start_thread(void *arg) pthread_exit(NULL); } +static int lfsck_get_mdt_index(struct lfsck_mdt_check_info *mdt_info, + struct lu_fid *fid) +{ + DBT key, data; + int i, rc = 0; + struct lfsck_mds_fldb range; + + for (i = 0; i < num_mdt_files; i++) { + DB *fldb = mdt_info[i].mdt_fldb; + DBC *dbcp; + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + data.data = ⦥ + data.size = data.ulen = sizeof(range); + data.flags = DB_DBT_USERMEM; + rc = fldb->cursor(fldb, NULL, &dbcp, 0); + if (rc) { + log_write("open mdt_fldb %s failed %d \n", + mdt_files[i], rc); + continue; + } + rc = dbcp->c_get(dbcp, &key, &data, DB_FIRST); + if (rc) { + log_write("get dbcp %s failed %s \n", + mdt_files[i], db_strerror(rc)); + dbcp->c_close(dbcp); + continue; + } + while ((rc = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) { + if (fid->f_seq >= range.lsr_start && + fid->f_seq < range.lsr_end) { + dbcp->c_close(dbcp); + return range.lsr_index; + } + } + dbcp->c_close(dbcp); + } + + return -1; +} + +static int lfsck_locate_fid_in_oi(struct lfsck_mdt_check_info *mdt_info, + int index, struct lu_fid *fid) +{ + int i; + int ret = -1; + + for (i = 0; i < num_mdt_files; i++) { + if (index == mdt_info[i].mdt_hdr->mds_index) { + DB *oidb = mdt_info[i].mdt_oidb; + struct osd_inode_id inode; + DBT key, data; + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + data.data = &inode; + data.size = data.ulen = sizeof(inode); + data.flags = DB_DBT_USERMEM; + key.data = fid; + key.size = sizeof(*fid); + ret = oidb->get(oidb, NULL, &key, &data, 0); + break; + } + } + return ret; +} + +static int lfsck_locate_fid(struct lfsck_mdt_check_info *mdt_info, + struct lu_fid *fid) +{ + int index; + int ret; + + index = lfsck_get_mdt_index(mdt_info, fid); + if (index == -1) { + fprintf(stderr, "round %d \n", index); + log_write("can not locate "DFID" in fldb \n", PFID(fid)); + return -1; + } + + /*Locate fid in oidb database */ + ret = lfsck_locate_fid_in_oi(mdt_info, index, fid); + return ret; +} + +static int lfsck_mdt_check_pass1(struct lfsck_mdt_check_info *mdt_info) +{ + int failed = 0; + int i; + + for (i = 0; i < num_mdt_files; i++) { + DB* mdt_dfiddb = mdt_info[i].mdt_dfiddb; + struct lu_fid mdt_fid; + DBT key, data; + DBC *dbcp; + int rc; + + rc = mdt_dfiddb->cursor(mdt_dfiddb, NULL, &dbcp, 0); + if (rc != 0) { + log_write("[%u]: error acquiring cursor for mds dfid table: %s\n", + i, db_strerror(rc)); + continue; + } + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + data.data = &mdt_fid; + data.size = data.ulen = sizeof(mdt_fid); + data.flags = DB_DBT_USERMEM; + while ((rc = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) { + + rc = lfsck_locate_fid(mdt_info, &mdt_fid); + if (rc != 0) { + failed = 1; + fprintf(stderr, "Can not veryfied "DFID": \n", + PFID(&mdt_fid)); + } + } + dbcp->c_close(dbcp); + if (failed) + fprintf(stderr, "MDS%d OIDB check failed \n", + mdt_info[i].mdt_hdr->mds_index); + else + fprintf(stderr, "MDS%d OIDB check pass \n", + mdt_info[i].mdt_hdr->mds_index); + } + if (failed) + return -1; + return 0; +} + +static int lfsck_mdt_check_stripe_ent(struct lfsck_mdt_check_info *mdt_info, DB *dbp) +{ + struct lfsck_mds_stripe_ent mdt_ent; + struct lu_fid fid; + DBC *dbcp; + DBT key, data; + int rc; + int failed = 0; + + rc = dbp->cursor(dbp, NULL, &dbcp, 0); + if (rc) { + log_write("Get cursor failed %s \n", db_strerror(rc)); + dbp->close(dbp, 0); + return rc; + } + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + key.data = &fid; + key.size = sizeof(fid); + data.data = &mdt_ent; + data.size = data.ulen = sizeof(mdt_ent); + data.flags = DB_DBT_USERMEM; + while ((rc = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) { + rc = lfsck_locate_fid(mdt_info, &mdt_ent.mds_fid); + if (rc != 0) { + failed = 1; + fprintf(stderr, "MDT can not verify idx %d "DFID":"DFID": \n", + mdt_ent.mds_mdtidx, PFID(&mdt_ent.mds_mfid), + PFID(&mdt_ent.mds_fid)); + } + rc = lfsck_locate_fid(mdt_info, &mdt_ent.mds_mfid); + if (rc != 0) { + failed = 1; + fprintf(stderr, "MDT can not verify idx %d "DFID":"DFID": \n", + mdt_ent.mds_mdtidx, PFID(&mdt_ent.mds_mfid), + PFID(&mdt_ent.mds_fid)); + } + } + dbcp->c_close(dbcp); + + if (failed) + return -1; + return 0; +} + +static int lfsck_mdt_check_stripedb(struct lfsck_mdt_check_info *mdt_info, + char *dbname) +{ + int i, failed = 0; + DB * dbp; + + for (i = 0; i < num_mdt_files; i++) { + int rc; + + rc = lfsck_opendb(mdt_files[i], dbname, &dbp, 1, 0, 0, DB_HASH); + if (rc) { + failed = 1; + continue; + } + + rc = lfsck_mdt_check_stripe_ent(mdt_info, dbp); + if (rc) { + failed = 1; + continue; + } + dbp->close(dbp, 0); + } + + if (failed) + log_write("MDT striped check failed. \n"); + else + log_write("MDT striped check pass! \n"); + + if (failed) + return -1; + + return 0; +} + +static int lfsck_mdt_check_pass2(struct lfsck_mdt_check_info *mdt_info) +{ + return lfsck_mdt_check_stripedb(mdt_info, MDS_MDTDB); +} + +static int lfsck_mdt_check_pass3(struct lfsck_mdt_check_info *mdt_info) +{ + return lfsck_mdt_check_stripedb(mdt_info, MDS_OBJDB); +} + +/* multiple-MDT checks and repair */ +static int lfsck_check_mdts(struct lfsck_mdt_check_info *mdt_info) +{ + int rc = 0; + + /* For single MDT, just return */ + if (num_mdt_files <= 1) + return rc; + + log_write("mds_lfsck: pass1: check cross-ref inode of MDT\n"); + lfsck_mdt_check_pass1(mdt_info); + + log_write("mds_lfsck: pass2: check for striped_dir MDT\n"); + lfsck_mdt_check_pass2(mdt_info); + + log_write("mds_lfsck: pass3: check for slave objects MDT\n"); + lfsck_mdt_check_pass3(mdt_info); + + return 0; +} + /* Start threads and run filesystem checks and repair */ int lfsck_run_checks() { - struct lfsck_mds_hdr *mds_hdr = NULL; + struct lfsck_mdt_check_info mdt_info[LMV_MAX_MDTS] = { {NULL} }; struct lfsck_thread_info *tinfo = NULL; pthread_t *threads = NULL; int rc, i; - DB *mds_direntdb = NULL; - DB *mds_hdrdb = NULL; - DB *mds_sizeinfodb = NULL; - DBT key, data; - int num_osts; + int num_osts = 0; - rc = lfsck_opendb(mds_file, MDS_HDR, &mds_hdrdb, 0, 0, 0); - if (rc != 0) { - log_write("%s: error opening mds_hdr in %s: rc %d\n", - mds_file, rc); - return(-EINVAL); - } - mds_hdr = malloc(sizeof(*mds_hdr)); - if (mds_hdr == NULL) { - log_write("%s: out of memory allocating DB header (%u)\n", - progname, sizeof(*mds_hdr)); - rc = -ENOMEM; - goto out; - } - memset(&key, 0, sizeof(key)); - memset(&data, 0, sizeof(data)); - mds_hdr->mds_magic = MDS_MAGIC; - key.data = &mds_hdr->mds_magic; - key.size = sizeof(mds_hdr->mds_magic); - data.data = mds_hdr; - data.size = sizeof(*mds_hdr); - data.ulen = sizeof(*mds_hdr); - data.flags = DB_DBT_USERMEM; - rc = mds_hdrdb->get(mds_hdrdb, NULL, &key, &data, 0); - if (rc != 0) { - log_write("%s: error getting mds_hdr info %s: %s\n", - progname, mds_file, db_strerror(rc)); - goto out; - } - letocpu_mds_hdr(mds_hdr); - - rc = lfsck_opendb(mds_file, MDS_DIRINFO, &mds_direntdb, 0, 0, 0); - if (rc != 0) { - log_write("%s: error opening dirinfo db %s\n", - progname, mds_file); - goto out; - } - - rc = lfsck_opendb(mds_file, MDS_SIZEINFO, &mds_sizeinfodb, 0, 0, 0); - if (rc != 0) { - log_write("%s: error opening sizeinfo db %s\n", - progname, mds_file); - goto out; - } - - if (lov_tgt_count > mds_hdr->mds_num_osts) { - fprintf(stderr, "%s: number of osts in lov (%u) > " - "num referenced in mds (%u) (new ost or " - "empty filesystem?)\n", progname, - lov_tgt_count, mds_hdr->mds_num_osts); - fprintf(stderr, "Do you wish to continue? (y/n)\n"); - if ((rc = get_response()) != 1) { - log_write("%s: exiting \n", progname); - goto out; - } - fprintf(stderr, "\n"); - - num_osts = lov_tgt_count; - } else { - num_osts = mds_hdr->mds_num_osts; - } - if (num_threads > num_osts) - num_threads = num_osts; - - tinfo = calloc(num_threads, sizeof(*tinfo)); + /* Check MDTs */ + for (i = 0; i < num_mdt_files; i++) { + rc = lfsck_opendb(mdt_files[i], MDS_HDR, &mdt_info[i].mdt_hdrdb, + 0, 0, 0, DB_HASH); + if (rc != 0) { + log_write("error opening mdt_hdr in %s: rc %d\n", + mdt_files[i], rc); + return(-EINVAL); + } + rc = lfsck_opendb(mdt_files[i], MDS_FLDB, &mdt_info[i].mdt_fldb, + 0, 0, 0, DB_BTREE); + if (rc) { + log_write("error opening mds_fldb in %s: rc %d \n", + mdt_files[i], rc); + goto out; + } + rc = lfsck_opendb(mdt_files[i], MDS_OIDB, &mdt_info[i].mdt_oidb, + 0, 0, 0, DB_HASH); + if (rc) { + log_write("error opening mds_oidb in %s: rc %d \n", + mdt_files[i], rc); + goto out; + } + rc = lfsck_opendb(mdt_files[i], MDS_DFIDDB, &mdt_info[i].mdt_dfiddb, + 0, 0, 0, DB_HASH); + if (rc) { + log_write("error opening mds_oidb in %s: rc %d \n", + mdt_files[i], rc); + goto out; + } + mdt_info[i].mdt_hdr = malloc(sizeof(struct lfsck_mds_hdr)); + if (mdt_info[i].mdt_hdr == NULL) { + log_write("%s: out of memory allocating DB header (%u)\n", + progname, sizeof(struct lfsck_mds_hdr)); + rc = -ENOMEM; + goto out; + } + rc = lfsck_get_mdt_hdr(mdt_info[i].mdt_hdrdb, mdt_info[i].mdt_hdr); + if (lov_tgt_count > mdt_info[i].mdt_hdr->mds_num_osts) { + fprintf(stderr, "%s: number of osts in lov (%u) > " + "num referenced in mds (%u) (new ost or " + "empty filesystem?)\n", progname, + lov_tgt_count, mdt_info[i].mdt_hdr->mds_num_osts); + fprintf(stderr, "Do you wish to continue? (y/n)\n"); + if ((rc = get_response()) != 1) { + log_write("%s: exiting \n", progname); + goto out; + } + fprintf(stderr, "\n"); + num_osts = lov_tgt_count; + } else { + num_osts = mdt_info[i].mdt_hdr->mds_num_osts; + } + if (num_threads > num_osts) + num_threads = num_osts; + } + + rc = lfsck_check_mdts(&mdt_info[0]); + + for (i = 0; i < num_mdt_files; i++) { + if (mdt_info[i].mdt_fldb != NULL) { + mdt_info[i].mdt_fldb->close(mdt_info[i].mdt_fldb, 0); + mdt_info[i].mdt_fldb = NULL; + } + if (mdt_info[i].mdt_oidb != NULL) { + mdt_info[i].mdt_oidb->close(mdt_info[i].mdt_oidb, 0); + mdt_info[i].mdt_oidb = NULL; + } + if (mdt_info[i].mdt_dfiddb != NULL) { + mdt_info[i].mdt_dfiddb->close(mdt_info[i].mdt_dfiddb, 0); + mdt_info[i].mdt_dfiddb = NULL; + } + if (mdt_info[i].mdt_hdrdb != NULL) { + mdt_info[i].mdt_hdrdb->close(mdt_info[i].mdt_hdrdb, 0); + mdt_info[i].mdt_hdrdb = NULL; + } + } + if (rc) + fprintf(stderr, "MDS-MDS consistency check failed \n"); + else + fprintf(stderr, "MDS-MDS consistency check succeed \n"); + + tinfo = calloc(num_threads, sizeof(*tinfo)); if (tinfo == NULL) { log_write("%s: out of memory for thread info\n", progname); rc = -ENOMEM; @@ -1799,6 +2139,23 @@ int lfsck_run_checks() goto out; } + for (i = 0; i < num_mdt_files; i++) { + rc = lfsck_opendb(mdt_files[i], MDS_DIRINFO, &mdt_info[i].mdt_direntdb, + 0, 0, 0, DB_HASH); + if (rc != 0) { + log_write("%s: error opening dirinfo db %s\n", + mdt_files[i], rc); + goto out; + } + + rc = lfsck_opendb(mdt_files[i], MDS_SIZEINFO, &mdt_info[i].mdt_sizeinfodb, + 0, 0, 0, DB_HASH); + if (rc != 0) { + log_write("%s: error opening sizeinfo db %s\n", + mdt_files[i], rc); + goto out; + } + } all_started = 0; for (i = 0; i < num_threads; i++) { __u32 end_ost_idx; @@ -1807,14 +2164,11 @@ int lfsck_run_checks() chunk = num_osts / num_threads; if (num_osts % num_threads) chunk++; - tinfo[i].mds_hdr = mds_hdr; - tinfo[i].mds_direntdb = mds_direntdb; - tinfo[i].mds_sizeinfodb = mds_sizeinfodb; + tinfo[i].mds_info = &mdt_info[0]; tinfo[i].status = 0; tinfo[i].start_ost_idx = (chunk) * i; end_ost_idx = (chunk) * (i + 1); - end_ost_idx = end_ost_idx > num_osts ? - num_osts : end_ost_idx; + end_ost_idx = end_ost_idx > num_osts ? num_osts : end_ost_idx; tinfo[i].end_ost_idx = end_ost_idx; rc = pthread_create(&threads[i], NULL, lfsck_start_thread, &tinfo[i]); @@ -1843,26 +2197,37 @@ int lfsck_run_checks() } } - rc = lfsck_run_pass4(mds_direntdb); - if (rc != 0) - goto out; - - rc = lfsck_run_pass5(mds_direntdb, mds_sizeinfodb); - + for (i = 0; i < num_mdt_files; i++) { + log_write("lfsck : pass4: check for duplicate object references MDT%d\n", + mdt_info[i].mdt_hdr->mds_index); + rc = lfsck_run_pass4(mdt_info[i].mdt_direntdb); + if (rc != 0) + goto out; + log_write("lfsck: pass5: file size correctness MDT%d\n", + mdt_info[i].mdt_hdr->mds_index); + rc = lfsck_run_pass5(mdt_info[i].mdt_direntdb, + mdt_info[i].mdt_sizeinfodb); + } out: if (threads) free(threads); if (tinfo) free(tinfo); - if (mds_hdr) - free(mds_hdr); - if (mds_direntdb) - mds_direntdb->close(mds_direntdb, 0); - if (mds_hdrdb) - mds_hdrdb->close(mds_hdrdb, 0); - if (mds_sizeinfodb) - mds_sizeinfodb->close(mds_sizeinfodb, 0); + for (i = 0; i < num_mdt_files; i++) { + if (mdt_info[i].mdt_fldb != NULL) + mdt_info[i].mdt_fldb->close(mdt_info[i].mdt_fldb, 0); + if (mdt_info[i].mdt_oidb != NULL) + mdt_info[i].mdt_oidb->close(mdt_info[i].mdt_oidb, 0); + if (mdt_info[i].mdt_dfiddb != NULL) + mdt_info[i].mdt_dfiddb->close(mdt_info[i].mdt_dfiddb, 0); + if (mdt_info[i].mdt_hdrdb != NULL) + mdt_info[i].mdt_hdrdb->close(mdt_info[i].mdt_hdrdb, 0); + if (mdt_info[i].mdt_direntdb) + mdt_info[i].mdt_direntdb->close(mdt_info[i].mdt_direntdb, 0); + if (mdt_info[i].mdt_sizeinfodb) + mdt_info[i].mdt_sizeinfodb->close(mdt_info[i].mdt_sizeinfodb, 0); + } return(rc); } @@ -1956,12 +2321,16 @@ int main(int argc, char *argv[]) if (lfsck_run_checks()) log_close(-1); - if (mds_file) - free(mds_file); for (i = 0; i < LOV_MAX_OSTS; i++) { if (ost_files[i]) free(ost_files[i]); } + + for (i = 0; i < LMV_MAX_MDTS; i++) { + if (mdt_files[i]) + free(mdt_files[i]); + } + if (lfsck_duplicates) free(lfsck_duplicates); diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck_common.c ./e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck_common.c --- old/e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck_common.c 2010-10-08 03:48:33.000000000 -0700 +++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck_common.c 2011-01-09 22:56:36.814242470 -0800 @@ -101,7 +101,8 @@ int lfsck_create_dbenv(const char *progn } int lfsck_opendb(const char *fname, const char *dbname, DB **dbpp, - int allow_dup, int keydata_size, int num_files) + int allow_dup, int keydata_size, int num_files, + DBTYPE type) { static int dbenv_set = 0; DB *dbp; @@ -134,26 +135,27 @@ int lfsck_opendb(const char *fname, cons return (EIO); } - if (keydata_size && num_files) { - h_ffactor = (pagesize - 32) / (keydata_size + 8); - if ((rc = dbp->set_h_ffactor(dbp, h_ffactor)) != 0) { - dbp->err(dbp, rc, "set_h_ffactor"); - dbp->close(dbp, 0); - return (EIO); + if (type == DB_HASH) { + if (keydata_size && num_files) { + h_ffactor = (pagesize - 32) / (keydata_size + 8); + if ((rc = dbp->set_h_ffactor(dbp, h_ffactor)) != 0) { + dbp->err(dbp, rc, "set_h_ffactor"); + dbp->close(dbp, 0); + return (EIO); + } + if ((rc = dbp->set_h_nelem(dbp, num_files)) != 0 ) { + dbp->err(dbp, rc, "set_h_nelem"); + dbp->close(dbp, 0); + return (EIO); + } } - if ((rc = dbp->set_h_nelem(dbp, num_files)) != 0 ) { - dbp->err(dbp, rc, "set_h_nelem"); + + if ((rc = dbp->set_h_hash(dbp, lfsck_hash_fn)) != 0 ) { + dbp->err(dbp, rc, "set_h_hash"); dbp->close(dbp, 0); return (EIO); } } - - if ((rc = dbp->set_h_hash(dbp, lfsck_hash_fn)) != 0 ) { - dbp->err(dbp, rc, "set_h_hash"); - dbp->close(dbp, 0); - return (EIO); - } - if (allow_dup) { if((rc = dbp->set_flags(dbp, DB_DUPSORT)) != 0) { fprintf(stderr, "Failure to allow duplicates\n"); @@ -161,12 +163,12 @@ int lfsck_opendb(const char *fname, cons return (EIO); } } - + #if (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) || (DB_VERSION_MAJOR > 4) - if ((rc = dbp->open(dbp, NULL, fname, dbname, DB_HASH, + if ((rc = dbp->open(dbp, NULL, fname, dbname, type, DB_CREATE | DB_INIT_LOCK | DB_THREAD, 0664)) != 0) #else - if ((rc = dbp->open(dbp, fname, dbname, DB_HASH, + if ((rc = dbp->open(dbp, fname, dbname, type, DB_CREATE | DB_INIT_LOCK | DB_THREAD, 0664)) != 0) #endif { @@ -185,6 +187,7 @@ void cputole_mds_hdr(struct lfsck_mds_hd mds_hdr->mds_flags = ext2fs_cpu_to_le64(mds_hdr->mds_flags); mds_hdr->mds_max_files = ext2fs_cpu_to_le64(mds_hdr->mds_max_files); mds_hdr->mds_num_osts = ext2fs_cpu_to_le64(mds_hdr->mds_num_osts); + mds_hdr->mds_index = ext2fs_cpu_to_le32(mds_hdr->mds_index); for (i = 0; i < num_osts; i++) { mds_hdr->mds_max_ost_id[i] = ext2fs_cpu_to_le64(mds_hdr->mds_max_ost_id[i]); @@ -199,6 +202,7 @@ void letocpu_mds_hdr(struct lfsck_mds_hd mds_hdr->mds_flags = ext2fs_le64_to_cpu(mds_hdr->mds_flags); mds_hdr->mds_max_files = ext2fs_le64_to_cpu(mds_hdr->mds_max_files); mds_hdr->mds_num_osts = ext2fs_le64_to_cpu(mds_hdr->mds_num_osts); + mds_hdr->mds_index = ext2fs_le64_to_cpu(mds_hdr->mds_index); for (i = 0; i < mds_hdr->mds_num_osts; i ++) { mds_hdr->mds_max_ost_id[i] = ext2fs_le64_to_cpu(mds_hdr->mds_max_ost_id[i]); @@ -207,18 +211,24 @@ void letocpu_mds_hdr(struct lfsck_mds_hd void cputole_ost_hdr(struct lfsck_ost_hdr *ost_hdr) { + int i; ost_hdr->ost_magic = ext2fs_cpu_to_le64(ost_hdr->ost_magic); ost_hdr->ost_flags = ext2fs_cpu_to_le64(ost_hdr->ost_flags); ost_hdr->ost_num_files = ext2fs_cpu_to_le64(ost_hdr->ost_num_files); - ost_hdr->ost_last_id = ext2fs_cpu_to_le64(ost_hdr->ost_last_id); + for (i = 0; i < ost_hdr->ost_mds_num; i++) + ost_hdr->ost_last_id[i] = ext2fs_cpu_to_le64(ost_hdr->ost_last_id[i]); + ost_hdr->ost_mds_num = ext2fs_cpu_to_le32(ost_hdr->ost_mds_num); } void letocpu_ost_hdr(struct lfsck_ost_hdr *ost_hdr) { + int i; ost_hdr->ost_magic = ext2fs_le64_to_cpu(ost_hdr->ost_magic); ost_hdr->ost_flags = ext2fs_le64_to_cpu(ost_hdr->ost_flags); ost_hdr->ost_num_files = ext2fs_le64_to_cpu(ost_hdr->ost_num_files); - ost_hdr->ost_last_id = ext2fs_le64_to_cpu(ost_hdr->ost_last_id); + ost_hdr->ost_mds_num = ext2fs_le32_to_cpu(ost_hdr->ost_mds_num); + for (i = 0; i < ost_hdr->ost_mds_num; i++) + ost_hdr->ost_last_id[i] = ext2fs_le64_to_cpu(ost_hdr->ost_last_id[i]); } void cputole_mds_dirent(struct lfsck_mds_dirent *mds_dirent) @@ -281,6 +291,14 @@ void letocpu_mds_objent(struct lfsck_mds mds_objent->mds_ostoffset = ext2fs_le32_to_cpu(mds_objent->mds_ostoffset); } +void cputole_mds_stripe_ent(struct lfsck_mds_stripe_ent *mds_objent) +{ + mds_objent->mds_fid.f_seq = ext2fs_cpu_to_le64(mds_objent->mds_fid.f_seq); + mds_objent->mds_fid.f_oid = ext2fs_cpu_to_le32(mds_objent->mds_fid.f_oid); + mds_objent->mds_fid.f_ver = ext2fs_cpu_to_le32(mds_objent->mds_fid.f_ver); + mds_objent->mds_mdtidx = ext2fs_cpu_to_le32(mds_objent->mds_mdtidx); +} + void cputole_ost_objent(struct lfsck_ost_objent *ost_objent) { ost_objent->ost_objid = ext2fs_cpu_to_le64(ost_objent->ost_objid); @@ -307,7 +325,7 @@ void letocpu_lov_user_md(struct lov_user lmm->lmm_magic = ext2fs_le32_to_cpu(lmm->lmm_magic); lmm->lmm_pattern = ext2fs_le32_to_cpu(lmm->lmm_pattern); lmm->lmm_object_id = ext2fs_le64_to_cpu(lmm->lmm_object_id); - lmm->lmm_object_gr = ext2fs_le64_to_cpu(lmm->lmm_object_gr); + lmm->lmm_object_seq = ext2fs_le64_to_cpu(lmm->lmm_object_seq); lmm->lmm_stripe_size = ext2fs_le32_to_cpu(lmm->lmm_stripe_size); lmm->lmm_stripe_count = ext2fs_le16_to_cpu(lmm->lmm_stripe_count); /* No swabbing needed for the lov_user_md_v3 lmm_pool_name */ @@ -321,9 +339,33 @@ void letocpu_lov_user_md(struct lov_user for (i = 0; i < lmm->lmm_stripe_count; i++, loi++) { loi->l_object_id = ext2fs_le64_to_cpu(loi->l_object_id); - loi->l_object_gr = ext2fs_le64_to_cpu(loi->l_object_gr); + loi->l_object_seq = ext2fs_le64_to_cpu(loi->l_object_seq); loi->l_ost_gen = ext2fs_le32_to_cpu(loi->l_ost_gen); loi->l_ost_idx = ext2fs_le32_to_cpu(loi->l_ost_idx); } } + +void letocpu_lmv_user_md(struct lmv_user_md *lmm) +{ + struct lmv_user_mds_data *lmi; + int i; + + lmm->lum_magic = ext2fs_le32_to_cpu(lmm->lum_magic); + lmm->lum_stripe_count = ext2fs_le32_to_cpu(lmm->lum_stripe_count); + lmm->lum_stripe_offset = ext2fs_le32_to_cpu(lmm->lum_stripe_offset); + lmm->lum_hash_type = ext2fs_le32_to_cpu(lmm->lum_hash_type); + lmm->lum_type = ext2fs_le32_to_cpu(lmm->lum_type); + + lmi = lmm->lum_objects; + /* If there is a bad magic, this will be found immediately in the + * call to lfsck_check_lov_ea() following this function. */ + + for (i = 0; i < lmm->lum_stripe_count; i++, lmi++) { + lmi->lum_fid.f_seq = ext2fs_le64_to_cpu(lmi->lum_fid.f_seq); + lmi->lum_fid.f_oid = ext2fs_le32_to_cpu(lmi->lum_fid.f_oid); + lmi->lum_fid.f_ver = ext2fs_le32_to_cpu(lmi->lum_fid.f_ver); + lmi->lum_mds = ext2fs_le32_to_cpu(lmi->lum_mds); + } +} + #endif Only in ./e2fsprogs-1.41.12.2.ora1/e2fsck/: lfsck_common.o diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.h ./e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.h --- old/e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.h 2010-10-08 03:48:33.000000000 -0700 +++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.h 2011-01-14 17:01:46.912165809 -0800 @@ -41,19 +41,27 @@ #define LAST_ID "LAST_ID" #define LAST_RCVD "last_rcvd" #define LOV_OBJID "lov_objid" +#define FIRST_MDT_GROUP 3 #ifndef EXT3_XATTR_INDEX_TRUSTED /* temporary until we hit l28 kernel */ #define EXT3_XATTR_INDEX_TRUSTED 4 #endif #define XATTR_LUSTRE_MDS_LOV_EA "lov" +#define XATTR_LUSTRE_MDS_LMV_EA "lmv" /* Database names */ #define MDS_HDR "mdshdr" #define MDS_DIRINFO "mds_dirinfo" #define MDS_SIZEINFO "mds_sizeinfo" #define MDS_OSTDB "mds_ostdb" +#define MDS_MDTDB "mds_mdtdb" +#define MDS_FLDB "mds_fldb" +#define MDS_OIDB "mds_oidb" +#define MDS_OBJDB "mds_obj" #define OST_HDR "osthdr" #define OST_OSTDB "ost_db" +#define MDS_DFIDDB "mds_dfiddb" + #define MDS_MAGIC 0xDBABCD01 #define OST_MAGIC 0xDB123402 @@ -68,6 +76,9 @@ #define LOV_EA_SIZE(lum, num) (sizeof(*lum) + num * sizeof(*lum->lmm_objects)) #define LOV_EA_MAX(lum) LOV_EA_SIZE(lum, LOV_MAX_OSTS) + +#define LMV_MAX_MDTS 8 + /*XXX*/ #define STRTOUL strtoul #define STRTOUL_MAX ULONG_MAX @@ -99,6 +110,7 @@ struct lfsck_mds_hdr { __u64 mds_max_files; __u32 mds_num_osts; __u32 mds_unused; + __u32 mds_index; __u64 mds_max_ost_id[LOV_MAX_OSTS]; struct obd_uuid mds_uuid; struct obd_uuid mds_ost_info[LOV_MAX_OSTS]; @@ -108,9 +120,10 @@ struct lfsck_ost_hdr { __u64 ost_magic; __u64 ost_flags; __u64 ost_num_files; - __u64 ost_last_id; + __u64 ost_last_id[LMV_MAX_MDTS]; __u32 ost_index; __u32 ost_unused; + __u32 ost_mds_num; struct obd_uuid ost_mds_uuid; struct obd_uuid ost_uuid; }; @@ -143,6 +156,19 @@ struct lfsck_mds_objent { __u32 mds_ostoffset; }; +struct lfsck_mds_stripe_ent { + __u32 mds_mdtidx; + struct lu_fid mds_mfid; + struct lu_fid mds_fid; +}; + +struct lfsck_mds_fldb { + __u64 lsr_start; + __u64 lsr_end; + __u32 lsr_index; + __u32 lsr_flags; +}; + struct lfsck_ost_objent { __u64 ost_objid; __u64 ost_group; @@ -159,11 +185,20 @@ struct lfsck_ofile_ctx { struct lfsck_outdb_info { __u32 ost_count; + __u32 mdt_count; int have_ost_count; DB *mds_sizeinfo_dbp; + DB *mds_dirfid_dbp; + DB *mds_dirstripe_dbp; struct lfsck_ofile_ctx *ofile_ctx; }; +struct osd_inode_id { + __u32 oii_ino; /* inode number */ + __u32 oii_gen; /* inode generation */ +}; + +typedef __u64 seqno_t; /* pass6.c */ extern int e2fsck_lfsck_found_ea(e2fsck_t ctx, ext2_ino_t ino, struct ext2_inode_large *inode, @@ -171,11 +206,15 @@ extern int e2fsck_lfsck_found_ea(e2fsck_ extern int e2fsck_lfsck_flush_ea(e2fsck_t ctx); extern int e2fsck_lfsck_cleanupdb(e2fsck_t ctx); extern int e2fsck_lfsck_remove_pending(e2fsck_t ctx, char *block_buf); +extern int e2fsck_lfsck_put_fid(e2fsck_t ctx, struct ext2_dir_entry_2 *de); +extern int e2fsck_lfsck_flush_fid(e2fsck_t ctx); /* lfsck_common.c */ + extern int lfsck_create_dbenv(const char *progname); extern int lfsck_opendb(const char *fname, const char *dbname, DB **dbpp, - int allow_dup, int keydata_size, int num_files); + int allow_dup, int keydata_size, int num_files, + DBTYPE type); extern void cputole_mds_hdr(struct lfsck_mds_hdr *mds_hdr); extern void letocpu_mds_hdr(struct lfsck_mds_hdr *mds_hdr); extern void cputole_ost_hdr(struct lfsck_ost_hdr *ost_hdr); @@ -189,6 +228,9 @@ extern void letocpu_mds_objent(struct lf extern void cputole_ost_objent(struct lfsck_ost_objent *ost_objent); extern void letocpu_ost_objent(struct lfsck_ost_objent *ost_objent); extern void letocpu_lov_user_md(struct lov_user_md *lmm); +extern void letocpu_lmv_user_md(struct lmv_user_md *lmv); + +extern void cputole_mds_stripe_ent(struct lfsck_mds_stripe_ent *mds_objent); #define MDS_START_DIRENT_TABLE sizeof(struct lfsck_mds_hdr) diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/pass1.c ./e2fsprogs-1.41.12.2.ora1/e2fsck/pass1.c --- old/e2fsprogs-1.41.12.2.ora1/e2fsck/pass1.c 2010-10-08 03:48:33.000000000 -0700 +++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/pass1.c 2011-01-15 20:50:43.366721207 -0800 @@ -521,8 +521,8 @@ static void check_inode_extra_space(e2fs } if (EXT4_FITS_IN_INODE(inode, inode, i_crtime) && - inode->i_crtime < sb->s_mkfs_time || - inode->i_crtime > ctx->now + ctx->now_tolerance) + (inode->i_crtime < sb->s_mkfs_time || + inode->i_crtime > ctx->now + ctx->now_tolerance)) e2fsck_mark_inode_bad(ctx, pctx->ino, BADNESS_HIGH); eamagic = IHDR(inode); diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/pass2.c ./e2fsprogs-1.41.12.2.ora1/e2fsck/pass2.c --- old/e2fsprogs-1.41.12.2.ora1/e2fsck/pass2.c 2010-10-08 03:48:33.000000000 -0700 +++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/pass2.c 2011-01-09 21:48:46.967778850 -0800 @@ -46,6 +46,7 @@ #include "e2fsck.h" #include "problem.h" #include "dict.h" +#include "lfsck.h" #ifdef NO_INLINE_FUNCS #define _INLINE_ @@ -141,6 +142,7 @@ void e2fsck_pass2(e2fsck_t ctx) cd.pctx.errcode = ext2fs_dblist_iterate(fs->dblist, check_dir_block, &cd); + e2fsck_lfsck_flush_fid(ctx); if (ctx->flags & E2F_FLAG_SIGNAL_MASK || ctx->flags & E2F_FLAG_RESTART) return; @@ -377,12 +379,12 @@ int e2fsck_check_dirent_data(e2fsck_t ct return 1; } if (de->file_type & ~EXT2_FT_MASK) { - if (de->rec_len >= EXT2_DIR_REC_LEN(de) || (de->rec_len + offset == EXT2_BLOCK_SIZE(ctx->fs->super))) { if (ext2_get_dirent_dirdata_size(de, EXT2_DIRENT_LUFID) == - EXT2_DIRENT_LUFID_SIZE) + EXT2_DIRENT_LUFID_SIZE) { return 0; + } } /* just clear dirent data flags for now, we should fix FID data * in lustre specific pass. @@ -972,6 +974,9 @@ out_htree: if (ret == 2) dir_modified++; + if (ret == 0) + e2fsck_lfsck_put_fid(ctx, (struct ext2_dir_entry_2 *)dirent); + /* * Make sure the inode listed is a legal one. */ @@ -1052,7 +1057,7 @@ out_htree: if (ctx->flags & E2F_FLAG_SIGNAL_MASK) return DIRENT_ABORT; } - + group = ext2fs_group_of_ino(fs, dirent->inode); first_unused_inode = group * fs->super->s_inodes_per_group + 1 + fs->super->s_inodes_per_group - diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/pass6.c ./e2fsprogs-1.41.12.2.ora1/e2fsck/pass6.c --- old/e2fsprogs-1.41.12.2.ora1/e2fsck/pass6.c 2010-10-08 03:48:33.000000000 -0700 +++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/pass6.c 2011-01-16 10:11:50.223886520 -0800 @@ -52,8 +52,30 @@ struct lfsck_ost_ctx { int numfiles; int status; __u64 max_objid; + __u64 mds_group; }; +static inline void e2fsck_fid_le_to_cpu(struct lu_fid *dst, struct lu_fid *src) +{ + dst->f_seq = ext2fs_le64_to_cpu(src->f_seq); + dst->f_oid = ext2fs_le32_to_cpu(src->f_oid); + dst->f_ver = ext2fs_le32_to_cpu(src->f_ver); +} + +static inline void e2fsck_fid_cpu_to_le(struct lu_fid *dst, struct lu_fid *src) +{ + dst->f_seq = ext2fs_cpu_to_le64(src->f_seq); + dst->f_oid = ext2fs_cpu_to_le32(src->f_oid); + dst->f_ver = ext2fs_cpu_to_le32(src->f_ver); +} + +static inline void e2fsck_fid_be_to_cpu(struct lu_fid *dst, struct lu_fid *src) +{ + dst->f_seq = ext2fs_be64_to_cpu(src->f_seq); + dst->f_oid = ext2fs_be32_to_cpu(src->f_oid); + dst->f_ver = ext2fs_be32_to_cpu(src->f_ver); +} + int e2fsck_lfsck_cleanupdb(e2fsck_t ctx) { int i; @@ -146,11 +168,17 @@ static int lfsck_write_mds_hdrinfo(e2fsc DB *mds_hdrdb = NULL; DBT key, data; int rc = 0; + char *mdsdb; int i; mds_hdrname = e2fsck_allocate_memory(ctx, PATH_MAX, "mds_hdr filename"); - sprintf(mds_hdrname, "%s.mdshdr",ctx->lustre_mdsdb); + /* lfsck_write_mds_hdrinfo can only be called when checking MDS, + * and only one MDS can be checked each time, so + * lustre_mds_files == 1 */ + assert(ctx->lustre_mds_files == 1); + mdsdb = ctx->lustre_mdsdb[0]; + sprintf(mds_hdrname, "%s.mdshdr",mdsdb); if (unlink(mds_hdrname)) { if (errno != ENOENT) { @@ -161,7 +189,7 @@ static int lfsck_write_mds_hdrinfo(e2fsc } } - rc = lfsck_opendb(mds_hdrname, MDS_HDR, &mds_hdrdb, 0, 0, 0); + rc = lfsck_opendb(mds_hdrname, MDS_HDR, &mds_hdrdb, 0, 0, 0, DB_HASH); if (rc != 0) { fprintf(stderr, "failure to open database for mdsdhr " "info%s: %s\n", MDS_HDR, db_strerror(rc)); @@ -213,61 +241,148 @@ out: return (rc); } -static int e2fsck_lfsck_save_ea(e2fsck_t ctx, ext2_ino_t ino, __u32 generation, - struct lov_user_md *lmm) +static int e2fsck_lfsck_init_oinfo(e2fsck_t ctx) { ext2_filsys fs = ctx->fs; - struct lfsck_mds_szinfo szinfo; - struct lov_user_ost_data_v1 *loi; - __u64 mds_fid; - int rc, i; - DBT key, data; - DB *dbp; + int rc; __u32 numfiles = fs->super->s_inodes_count - fs->super->s_free_inodes_count; + char *mdsdb; - if (!ctx->lfsck_oinfo) { - /* remove old db file */ - if (unlink(ctx->lustre_mdsdb)) { - rc = errno; - if (rc != ENOENT) { - fprintf(stderr,"Error removing old db %s: %s\n", - ctx->lustre_mdsdb, strerror(rc)); - ctx->flags |= E2F_FLAG_ABORT; - return rc; - } - } + assert(ctx->lustre_mds_files == 1); + mdsdb = ctx->lustre_mdsdb[0]; - rc = ext2fs_get_mem(sizeof(struct lfsck_outdb_info), - &ctx->lfsck_oinfo); - if (rc) { - ctx->lfsck_oinfo = NULL; + /* remove old db file */ + if (unlink(mdsdb)) { + rc = errno; + if (rc != ENOENT) { + fprintf(stderr,"Error removing old db %s: %s\n", + mdsdb, strerror(rc)); ctx->flags |= E2F_FLAG_ABORT; return rc; } - memset(ctx->lfsck_oinfo, 0, sizeof(struct lfsck_outdb_info)); - rc = ext2fs_get_mem(sizeof(struct lfsck_ofile_ctx)*LOV_MAX_OSTS, - &ctx->lfsck_oinfo->ofile_ctx); - if (rc) { - ext2fs_free_mem(&ctx->lfsck_oinfo); - ctx->flags |= E2F_FLAG_ABORT; + } + + rc = ext2fs_get_mem(sizeof(struct lfsck_outdb_info), + &ctx->lfsck_oinfo); + if (rc) { + ctx->lfsck_oinfo = NULL; + ctx->flags |= E2F_FLAG_ABORT; + return rc; + } + memset(ctx->lfsck_oinfo, 0, sizeof(struct lfsck_outdb_info)); + rc = ext2fs_get_mem(sizeof(struct lfsck_ofile_ctx)*LOV_MAX_OSTS, + &ctx->lfsck_oinfo->ofile_ctx); + if (rc) { + ext2fs_free_mem(&ctx->lfsck_oinfo); + ctx->flags |= E2F_FLAG_ABORT; + return rc; + } + + memset(ctx->lfsck_oinfo->ofile_ctx, 0, + sizeof(struct lfsck_ofile_ctx) * LOV_MAX_OSTS); + if (lfsck_opendb(mdsdb, MDS_SIZEINFO, + &ctx->lfsck_oinfo->mds_sizeinfo_dbp, 0, + sizeof(__u64) + sizeof(struct lfsck_mds_szinfo), + numfiles, DB_HASH)) { + fprintf(stderr, "Failed to open db file %s\n", + MDS_SIZEINFO); + ctx->flags |= E2F_FLAG_ABORT; + return (EIO); + } + if (lfsck_opendb(mdsdb, MDS_MDTDB, + &ctx->lfsck_oinfo->mds_dirstripe_dbp, 1, + sizeof(struct lu_fid) + sizeof(struct lfsck_mds_stripe_ent), + numfiles, DB_HASH)) { + fprintf(stderr, "Failed to open db file %s\n", + MDS_SIZEINFO); + ctx->flags |= E2F_FLAG_ABORT; + return (EIO); + } + + if (ctx->options & E2F_OPT_READONLY) { + e2fsck_get_lov_objids(ctx, ctx->lfsck_oinfo); + lfsck_write_mds_hdrinfo(ctx, ctx->lfsck_oinfo); + } + + return rc; +} + +static int e2fsck_lfsck_save_lmv_ea(e2fsck_t ctx, ext2_ino_t ino, + __u32 generation, struct lmv_user_md *lmv) +{ + struct lmv_user_mds_data *lmi; + struct lu_fid master_fid; + int rc = 0, i; + DBT key, data; + DB *dbp; + + assert(ctx->lustre_mds_files == 1); + if (!ctx->lfsck_oinfo) { + rc = e2fsck_lfsck_init_oinfo(ctx); + if (rc) return rc; + } + lmi = lmv->lum_objects; + e2fsck_fid_le_to_cpu(&master_fid, &lmi[0].lum_fid); + for (i = 0; i < lmv->lum_stripe_count; i++, lmi++) { + int mdt_idx = lmi->lum_mds; + struct lfsck_mds_stripe_ent mds_ent; + struct lu_fid fid; + + e2fsck_fid_le_to_cpu(&fid, &lmi->lum_fid); + if (mdt_idx >= LMV_MAX_MDTS) { + fprintf(stderr, "invalid MDT index %u ino %u[%d]\n", + mdt_idx, ino, i); + continue; } - memset(ctx->lfsck_oinfo->ofile_ctx, 0, - sizeof(struct lfsck_ofile_ctx) * LOV_MAX_OSTS); - if (lfsck_opendb(ctx->lustre_mdsdb, MDS_SIZEINFO, - &ctx->lfsck_oinfo->mds_sizeinfo_dbp, 0, - sizeof(mds_fid) + sizeof(szinfo), numfiles)) { - fprintf(stderr, "Failed to open db file %s\n", - MDS_SIZEINFO); + + if (mdt_idx + 1 > ctx->lfsck_oinfo->mdt_count) + ctx->lfsck_oinfo->mdt_count = mdt_idx + 1; + + e2fsck_fid_cpu_to_le(&mds_ent.mds_mfid, &master_fid); + e2fsck_fid_cpu_to_le(&mds_ent.mds_fid, &fid); + mds_ent.mds_mdtidx = ext2fs_cpu_to_le32(mdt_idx); + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.data = &fid; + key.size = sizeof(struct lu_fid); + + cputole_mds_stripe_ent(&mds_ent); + data.data = &mds_ent; + data.size = sizeof(mds_ent); + dbp = ctx->lfsck_oinfo->mds_dirstripe_dbp; + if ((rc = dbp->put(dbp, NULL, &key, &data, 0)) != 0) { + dbp->err(dbp, rc, "db->put failed\n"); + e2fsck_lfsck_cleanupdb(ctx); ctx->flags |= E2F_FLAG_ABORT; + /* XXX - Free lctx memory */ return (EIO); } + } + return rc; +} - if (ctx->options & E2F_OPT_READONLY) { - e2fsck_get_lov_objids(ctx, ctx->lfsck_oinfo); - lfsck_write_mds_hdrinfo(ctx, ctx->lfsck_oinfo); - } +static int e2fsck_lfsck_save_ea(e2fsck_t ctx, ext2_ino_t ino, __u32 generation, + struct lov_user_md *lmm) +{ + ext2_filsys fs = ctx->fs; + struct lfsck_mds_szinfo szinfo; + struct lov_user_ost_data_v1 *loi; + __u64 mds_fid; + int rc, i; + DBT key, data; + DB *dbp; + __u32 numfiles = fs->super->s_inodes_count - + fs->super->s_free_inodes_count; + char *mdsdb; + + assert(ctx->lustre_mds_files == 1); + mdsdb = ctx->lustre_mdsdb[0]; + if (!ctx->lfsck_oinfo) { + rc = e2fsck_lfsck_init_oinfo(ctx); + if (rc) + return rc; } if (lmm->lmm_magic == LOV_USER_MAGIC_V3) loi = ((struct lov_user_md_v3 *)lmm)->lmm_objects; @@ -278,7 +393,7 @@ static int e2fsck_lfsck_save_ea(e2fsck_t /* XXX: We don't save the layout type here. This doesn't matter for * now, we don't really need the pool information for lfsck, but * in the future we may need it for RAID-1 and other layouts. */ - szinfo.mds_group = lmm->lmm_object_gr; + szinfo.mds_group = lmm->lmm_object_seq; szinfo.mds_stripe_size = lmm->lmm_stripe_size; szinfo.mds_stripe_start = loi->l_ost_idx; szinfo.mds_stripe_count = lmm->lmm_stripe_count; @@ -330,10 +445,9 @@ static int e2fsck_lfsck_save_ea(e2fsck_t char dbname[256]; memset(dbname, 0, 256); sprintf(dbname, "%s.%d", MDS_OSTDB, ost_idx); - rc = lfsck_opendb(ctx->lustre_mdsdb, dbname, - &ofile_ctx->dbp, 1, + rc = lfsck_opendb(mdsdb, dbname, &ofile_ctx->dbp, 1, sizeof(objid) + sizeof(mds_ent), - numfiles); + numfiles, DB_HASH); if (rc) { e2fsck_lfsck_cleanupdb(ctx); ctx->flags |= E2F_FLAG_ABORT; @@ -384,13 +498,23 @@ int lfsck_check_lov_ea(e2fsck_t ctx, str lmm->lmm_magic); return(-EINVAL); } - - if (lmm->lmm_object_gr != 0 ) { +#if 0 + if (lmm->lmm_object_seq != 0 ) { VERBOSE(ctx, "error: only handle group 0 not "LPU64"\n", - lmm->lmm_object_gr); + lmm->lmm_object_seq); return(-EINVAL); } +#endif + return 0; +} +int lfsck_check_lmv_ea(e2fsck_t ctx, struct lmv_user_md *lmv) +{ + if (lmv->lum_magic != LMV_MAGIC_V1) { + VERBOSE(ctx, "error: wrong magic %08x , not %08x\n", + lmv->lum_magic, LMV_MAGIC_V1); + return(-EINVAL); + } return 0; } @@ -406,22 +530,34 @@ int e2fsck_lfsck_found_ea(e2fsck_t ctx, if ((ctx->lustre_devtype & LUSTRE_TYPE) != LUSTRE_MDS) return 0; - if (!LINUX_S_ISREG(inode->i_mode)) + if (!LINUX_S_ISREG(inode->i_mode) && !LINUX_S_ISDIR(inode->i_mode)) return 0; + + if (LINUX_S_ISREG(inode->i_mode)) { + if (entry->e_name_index == EXT3_XATTR_INDEX_TRUSTED && + !strncmp(entry->e_name,XATTR_LUSTRE_MDS_LOV_EA,entry->e_name_len)){ + struct lov_user_md *lmm = value; + letocpu_lov_user_md(lmm); - if (entry->e_name_index == EXT3_XATTR_INDEX_TRUSTED && - !strncmp(entry->e_name,XATTR_LUSTRE_MDS_LOV_EA,entry->e_name_len)){ - struct lov_user_md *lmm = value; - letocpu_lov_user_md(lmm); + if (lfsck_check_lov_ea(ctx, lmm)) { + ctx->flags |= E2F_FLAG_ABORT; + return -EINVAL; + } - if (lfsck_check_lov_ea(ctx, lmm)) { - ctx->flags |= E2F_FLAG_ABORT; - return -EINVAL; + return e2fsck_lfsck_save_ea(ctx, ino, inode->i_generation, lmm); } - - return e2fsck_lfsck_save_ea(ctx, ino, inode->i_generation, lmm); + } else { + if (entry->e_name_index == EXT3_XATTR_INDEX_TRUSTED && + !strncmp(entry->e_name, XATTR_LUSTRE_MDS_LMV_EA, entry->e_name_len)) { + struct lmv_user_md *lmv = value; + letocpu_lmv_user_md(lmv); + if (lfsck_check_lmv_ea(ctx, lmv)) { + ctx->flags |= E2F_FLAG_ABORT; + return -EINVAL; + } + return e2fsck_lfsck_save_lmv_ea(ctx, ino, inode->i_generation, lmv); + } } - return 0; } @@ -452,6 +588,11 @@ int e2fsck_lfsck_flush_ea(e2fsck_t ctx) rc += dbp->close(dbp, 0); ctx->lfsck_oinfo->mds_sizeinfo_dbp = NULL; } + if (ctx->lfsck_oinfo->mds_dirstripe_dbp != NULL) { + dbp = ctx->lfsck_oinfo->mds_dirstripe_dbp; + rc += dbp->close(dbp, 0); + ctx->lfsck_oinfo->mds_dirstripe_dbp = NULL; + } if (rc) ctx->flags |= E2F_FLAG_ABORT; @@ -459,6 +600,65 @@ int e2fsck_lfsck_flush_ea(e2fsck_t ctx) return(rc); } +int e2fsck_lfsck_put_fid(e2fsck_t ctx, struct ext2_dir_entry_2 *de) +{ + char *len = de->name + de->name_len + 1 /* NUL terminator */; + struct lu_fid *fid; + DBT key, data; + DB *dbp; + char *mdsdb; + + assert(ctx->lustre_mds_files == 1); + mdsdb = ctx->lustre_mdsdb[0]; + + assert(ctx->lfsck_oinfo != NULL); + if (ctx->lfsck_oinfo->mds_dirfid_dbp == NULL) { + ext2_filsys fs = ctx->fs; + __u32 numfiles = fs->super->s_inodes_count - + fs->super->s_free_inodes_count; + if (lfsck_opendb(mdsdb, MDS_DFIDDB, + &ctx->lfsck_oinfo->mds_dirfid_dbp, 0, + sizeof(__u32) + sizeof(struct lu_fid), + numfiles, DB_HASH)) { + fprintf(stderr, "Failed to open db file %s\n", + MDS_DFIDDB); + ctx->flags |= E2F_FLAG_ABORT; + return (EIO); + } + } + + fid = (struct lu_fid *)(len + 1); + + e2fsck_fid_be_to_cpu(fid, fid); + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.data = &de->inode; + key.size = sizeof(de->inode); + + e2fsck_fid_cpu_to_le(fid, fid); + + data.data = fid; + data.size = sizeof(*fid); + + dbp = ctx->lfsck_oinfo->mds_dirfid_dbp; + if (dbp->put(dbp, NULL, &key, &data, 0) != 0) { + fprintf(stderr, "Failure to put data into db\n"); + ctx->flags |= E2F_FLAG_ABORT; + return(DIRENT_ABORT); + } + return 0; +} + +int e2fsck_lfsck_flush_fid(e2fsck_t ctx) +{ + if (ctx->lfsck_oinfo && ctx->lfsck_oinfo->mds_dirfid_dbp != NULL) { + DB* dbp = ctx->lfsck_oinfo->mds_dirfid_dbp; + dbp->close(dbp, 0); + ctx->lfsck_oinfo->mds_dirfid_dbp = NULL; + } + return 0; +} + /* From debugfs.c for file removal */ static int lfsck_release_blocks_proc(ext2_filsys fs, blk_t *blocknr, int blockcnt, void *private) @@ -555,6 +755,7 @@ static int lfsck_list_objs(ext2_ino_t di } objent.ost_objid = objid; + objent.ost_group = lctx->mds_group; objent.ost_flag = 0; if (LINUX_S_ISREG(inode.i_mode)) objent.ost_size = EXT2_I_SIZE(&inode); @@ -665,11 +866,15 @@ static int lfsck_iterate_obj_dirs(ext2_i } /* Get the starting point of where the objects reside */ -static int lfsck_get_object_dir(e2fsck_t ctx, char *block_buf,ext2_ino_t *inode) +static int lfsck_get_object_dir(e2fsck_t ctx, char *block_buf, + ext2_ino_t **inode) { ext2_filsys fs = ctx->fs; ext2_ino_t tinode; + int i = 0; int rc; + int count = ctx->lustre_mds_files; + ext2_ino_t *dir = *inode; rc = ext2fs_lookup(fs, EXT2_ROOT_INO, OBJECT_DIR, strlen(OBJECT_DIR), block_buf, &tinode); @@ -677,46 +882,57 @@ static int lfsck_get_object_dir(e2fsck_t fprintf(stderr, "error looking up OST object parent dir\n"); return (ENOENT); } + rc = ext2fs_check_directory(fs, tinode); - if (rc) { + if (rc) return(ENOENT); - } rc = ext2fs_lookup(fs, tinode, OBJECT_DIR_V1, strlen(OBJECT_DIR_V1), - block_buf, inode); + block_buf, dir); if (rc) { - rc = ext2fs_lookup(fs, tinode, OBJECT_DIR_V2, - strlen(OBJECT_DIR_V2), block_buf, inode); - if (rc) { - fprintf(stderr, "error looking up OST object subdir\n"); - return (-ENOENT); - } - } - rc = ext2fs_check_directory(fs, *inode); - if (rc) { - return(-ENOENT); + for (i = 0; i < count; i++) { + char object_name[5]; + sprintf(object_name, "%d", i == 0 ? i : + i + FIRST_MDT_GROUP - 1); + rc = ext2fs_lookup(fs, tinode, object_name, + strlen(object_name), block_buf, dir); + if (rc) { + fprintf(stderr, "error looking up OST object subdir %d \n", i); + break; + } + rc = ext2fs_check_directory(fs, *dir); + if (rc) + return(-ENOENT); + dir ++; + } + } else { + rc = ext2fs_check_directory(fs, *dir); + if (rc) + return(-ENOENT); } return(0); } /* What is the last object id for the OST */ -static int lfsck_get_last_id(e2fsck_t ctx, __u64 *last_id) +static int lfsck_get_last_id(e2fsck_t ctx, __u64 *last_id, int index) { ext2_filsys fs = ctx->fs; - ext2_ino_t inode, tinode; + ext2_ino_t inode[LMV_MAX_MDTS], tinode; ext2_file_t e2_file; char *block_buf; unsigned int got; int rc; + ext2_ino_t *dir = &inode[0]; block_buf = e2fsck_allocate_memory(ctx, fs->blocksize * 3, "lookup buffer"); - rc = lfsck_get_object_dir(ctx, block_buf, &inode); + rc = lfsck_get_object_dir(ctx, block_buf, &dir); if (rc) goto out; - rc = ext2fs_lookup(fs, inode, LAST_ID, + assert(index < ctx->lustre_mds_files); + rc = ext2fs_lookup(fs, inode[index], LAST_ID, strlen(LAST_ID), block_buf, &tinode); if (rc) goto out; @@ -736,32 +952,31 @@ static int lfsck_get_last_id(e2fsck_t ct ext2fs_file_close(e2_file); goto out; } - rc = ext2fs_file_close(e2_file); - *last_id = ext2fs_le64_to_cpu(*last_id); out: ext2fs_free_mem(&block_buf); return (rc); } -int lfsck_set_last_id(e2fsck_t ctx, __u64 last_id) +int lfsck_set_last_id(e2fsck_t ctx, __u64 last_id, int group) { ext2_filsys fs = ctx->fs; - ext2_ino_t inode, tinode; + ext2_ino_t inode[LMV_MAX_MDTS], tinode; ext2_file_t e2_file; char *block_buf; unsigned int written; int rc; + ext2_ino_t *dir = inode; block_buf = e2fsck_allocate_memory(ctx, fs->blocksize * 3, "lookup buffer"); - - rc = lfsck_get_object_dir(ctx, block_buf, &inode); + rc = lfsck_get_object_dir(ctx, block_buf, &dir); if (rc) goto out; - rc = ext2fs_lookup(fs, inode, LAST_ID, + assert(group < ctx->lustre_mds_files); + rc = ext2fs_lookup(fs, inode[group], LAST_ID, strlen(LAST_ID), block_buf, &tinode); if (rc) goto out; @@ -787,7 +1002,6 @@ int lfsck_set_last_id(e2fsck_t ctx, __u } rc = ext2fs_file_close(e2_file); - out: ext2fs_free_mem(&block_buf); return (rc); @@ -836,9 +1050,12 @@ int e2fsck_get_last_rcvd_info(e2fsck_t c if (local_uuid) memcpy(local_uuid, &lsd->lsd_uuid, sizeof(lsd->lsd_uuid)); - if (peer_uuid) + if (peer_uuid) { memcpy(peer_uuid, &lsd->lsd_peeruuid,sizeof(lsd->lsd_peeruuid)); + fprintf(stderr, "peeruuid is %s \n", peer_uuid->uuid); + } + if (subdircount) *subdircount = ext2fs_le16_to_cpu(lsd->lsd_subdir_count); @@ -984,9 +1201,12 @@ int lfsck_create_objid(e2fsck_t ctx, __u char name[32]; int len, dirlen; __u32 compat, incompat, subdircount; - ext2_ino_t inode, tinode, cinode; + ext2_ino_t inode[LMV_MAX_MDTS], tinode, cinode; struct ext2_inode ext2inode; char *block_buf; + int count = ctx->lustre_mds_files; + ext2_ino_t *dir = inode; + int i; block_buf = e2fsck_allocate_memory(ctx, ctx->fs->blocksize * 3, "lookup buffer"); @@ -1012,55 +1232,56 @@ int lfsck_create_objid(e2fsck_t ctx, __u goto out; } - if (lfsck_get_object_dir(ctx, block_buf, &inode)) { + if (lfsck_get_object_dir(ctx, block_buf, &dir)) { rc = EINVAL; goto out; } dirlen = sprintf(dirname, "d%u", (int)objid & (subdircount - 1)); - rc = ext2fs_lookup(ctx->fs, inode, dirname, - dirlen, block_buf, &tinode); - if (rc) { - rc = EINVAL; - goto out; - } + for (i = 0; i < count; i++) { + rc = ext2fs_lookup(ctx->fs, inode[i], dirname, + dirlen, block_buf, &tinode); + if (rc) { + rc = EINVAL; + goto out; + } - if (ext2fs_namei(ctx->fs, EXT2_ROOT_INO, tinode, name, &cinode) == 0) { - fprintf(stderr, "Failure to create obj\n"); - rc = EINVAL; - goto out; - } + if (ext2fs_namei(ctx->fs, EXT2_ROOT_INO, tinode, name, &cinode) == 0) { + fprintf(stderr, "Failure to create obj\n"); + rc = EINVAL; + goto out; + } - rc = ext2fs_new_inode(ctx->fs, tinode, 010755, 0, &cinode); - if (rc) { - fprintf(stderr, "Failure to create obj\n"); - rc = EINVAL; - goto out; - } + rc = ext2fs_new_inode(ctx->fs, tinode, 010755, 0, &cinode); + if (rc) { + fprintf(stderr, "Failure to create obj\n"); + rc = EINVAL; + goto out; + } - rc = ext2fs_link(ctx->fs, tinode, name, cinode, EXT2_FT_REG_FILE); - if (rc) { - fprintf(stderr, "Failure to create obj\n"); - rc = EINVAL; - goto out; - } + rc = ext2fs_link(ctx->fs, tinode, name, cinode, EXT2_FT_REG_FILE); + if (rc) { + fprintf(stderr, "Failure to create obj\n"); + rc = EINVAL; + goto out; + } - if (ext2fs_test_inode_bitmap(ctx->fs->inode_map, cinode)) { - fprintf(stderr, "Warning: inode already set"); - } - ext2fs_inode_alloc_stats2(ctx->fs, cinode, +1, 0); - memset(&ext2inode, 0, sizeof(ext2inode)); - ext2inode.i_mode = LINUX_S_IFREG; - ext2inode.i_atime = ext2inode.i_ctime = ext2inode.i_mtime = time(NULL); - ext2inode.i_links_count = 1; - ext2inode.i_size = 0; - if (ext2fs_write_inode(ctx->fs, cinode, &ext2inode)) { - fprintf(stderr, "Failure to create obj\n"); - rc = EINVAL; - goto out; + if (ext2fs_test_inode_bitmap(ctx->fs->inode_map, cinode)) { + fprintf(stderr, "Warning: inode already set"); + } + ext2fs_inode_alloc_stats2(ctx->fs, cinode, +1, 0); + memset(&ext2inode, 0, sizeof(ext2inode)); + ext2inode.i_mode = LINUX_S_IFREG; + ext2inode.i_atime = ext2inode.i_ctime = ext2inode.i_mtime = time(NULL); + ext2inode.i_links_count = 1; + ext2inode.i_size = 0; + if (ext2fs_write_inode(ctx->fs, cinode, &ext2inode)) { + fprintf(stderr, "Failure to create obj\n"); + rc = EINVAL; + goto out; + } } - out: ext2fs_free_mem((void *)&(block_buf)); return (rc); @@ -1074,13 +1295,14 @@ void e2fsck_pass6_ost(e2fsck_t ctx) ext2_filsys fs = ctx->fs; struct lfsck_ost_ctx lctx; struct lfsck_ost_hdr ost_hdr; - struct lfsck_mds_hdr mds_hdr; + struct lfsck_mds_hdr mds_hdr[LMV_MAX_MDTS]; struct lfsck_ost_objent objent; DB *outdb = NULL; - DB *mds_hdrdb = NULL; + DB *mds_hdrdb[LMV_MAX_MDTS] = {NULL}; DB *osthdr = NULL; DBT key, data; - ext2_ino_t dir; + ext2_ino_t dir[LMV_MAX_MDTS]; + ext2_ino_t *dirp = &dir[0]; __u32 compat, rocompat, incompat; int i, rc; char *block_buf = NULL; @@ -1096,38 +1318,39 @@ void e2fsck_pass6_ost(e2fsck_t ctx) block_buf = e2fsck_allocate_memory(ctx, fs->blocksize * 3, "block iterate buffer"); + for (i = 0; i < ctx->lustre_mds_files; i ++) { + rc = lfsck_opendb(ctx->lustre_mdsdb[i], MDS_HDR, &mds_hdrdb[i], 0, 0, 0, + DB_HASH); + if (rc != 0) { + fprintf(stderr, "failure to open database %s: %s\n", + MDS_HDR, db_strerror(rc)); + ctx->flags |= E2F_FLAG_ABORT; + goto out; + } - rc = lfsck_opendb(ctx->lustre_mdsdb, MDS_HDR, &mds_hdrdb, 0, 0, 0); - if (rc != 0) { - fprintf(stderr, "failure to open database %s: %s\n", - MDS_HDR, db_strerror(rc)); - ctx->flags |= E2F_FLAG_ABORT; - goto out; - } + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + mds_hdr[i].mds_magic = MDS_MAGIC; + key.data = &mds_hdr[i].mds_magic; + key.size = sizeof(mds_hdr[i].mds_magic); + data.data = &mds_hdr[i]; + data.size = sizeof(struct lfsck_mds_hdr); + data.ulen = sizeof(struct lfsck_mds_hdr); + data.flags = DB_DBT_USERMEM; + rc = mds_hdrdb[i]->get(mds_hdrdb[i], NULL, &key, &data, 0); + if (rc) { + fprintf(stderr,"error getting mds_hdr ("LPU64":%u) in %s: %s\n", + mds_hdr[i].mds_magic, (int)sizeof(mds_hdr[i].mds_magic), + ctx->lustre_mdsdb[i], db_strerror(rc)); + ctx->flags |= E2F_FLAG_ABORT; + goto out; + } - memset(&key, 0, sizeof(key)); - memset(&data, 0, sizeof(data)); - mds_hdr.mds_magic = MDS_MAGIC; - key.data = &mds_hdr.mds_magic; - key.size = sizeof(mds_hdr.mds_magic); - data.data = &mds_hdr; - data.size = sizeof(mds_hdr); - data.ulen = sizeof(mds_hdr); - data.flags = DB_DBT_USERMEM; - rc = mds_hdrdb->get(mds_hdrdb, NULL, &key, &data, 0); - if (rc) { - fprintf(stderr,"error getting mds_hdr ("LPU64":%u) in %s: %s\n", - mds_hdr.mds_magic, (int)sizeof(mds_hdr.mds_magic), - ctx->lustre_mdsdb, db_strerror(rc)); - ctx->flags |= E2F_FLAG_ABORT; - goto out; + assert(data.size == sizeof(struct lfsck_mds_hdr)); + memcpy(&mds_hdr[i], data.data, sizeof(struct lfsck_mds_hdr)); + letocpu_mds_hdr(&mds_hdr[i]); } - - assert(data.size == sizeof(mds_hdr)); - memcpy(&mds_hdr, data.data, sizeof(mds_hdr)); - letocpu_mds_hdr(&mds_hdr); - - rc = lfsck_opendb(ctx->lustre_ostdb, OST_HDR, &osthdr, 0, 0, 0); + rc = lfsck_opendb(ctx->lustre_ostdb, OST_HDR, &osthdr, 0, 0, 0, DB_HASH); if (rc != 0) { fprintf(stderr, "failure to open database %s: %s\n", OST_HDR, db_strerror(rc)); @@ -1135,17 +1358,6 @@ void e2fsck_pass6_ost(e2fsck_t ctx) goto out; } - rc = lfsck_opendb(ctx->lustre_ostdb, OST_OSTDB, &outdb, 0, - sizeof(objent.ost_objid) + sizeof(objent), - fs->super->s_inodes_count - - fs->super->s_free_inodes_count); - if (rc != 0) { - fprintf(stderr, "error getting ost_hdr in %s: %s\n", - ctx->lustre_ostdb, db_strerror(rc)); - ctx->flags |= E2F_FLAG_ABORT; - goto out; - } - if (e2fsck_get_last_rcvd_info(ctx, &ost_hdr.ost_uuid, &ost_hdr.ost_mds_uuid, NULL, &ost_hdr.ost_index, @@ -1169,7 +1381,7 @@ void e2fsck_pass6_ost(e2fsck_t ctx) * Get /O/R or /O/0 directory * for each entry scan all the dirents and get the object id */ - if (lfsck_get_object_dir(ctx, block_buf, &dir)) { + if (lfsck_get_object_dir(ctx, block_buf, &dirp)) { ctx->flags |= E2F_FLAG_ABORT; goto out; } @@ -1178,56 +1390,79 @@ void e2fsck_pass6_ost(e2fsck_t ctx) * Okay so we have the containing directory so let's iterate over the * containing d* dirs and then iterate again inside */ - lctx.ctx = ctx; - lctx.outdb = outdb; - lctx.status = 0; - lctx.numfiles = 0; - lctx.max_objid = 0; - lctx.status = ext2fs_dir_iterate2(fs, dir, 0, block_buf, - lfsck_iterate_obj_dirs, &lctx); - if (lctx.status) { - fprintf(stderr, "Failure in iterating object dirs\n"); - ctx->flags |= E2F_FLAG_ABORT; - return; - } - - ost_hdr.ost_magic = OST_MAGIC; - ost_hdr.ost_flags = ctx->options & E2F_OPT_READONLY; - ost_hdr.ost_num_files = lctx.numfiles; - VERBOSE(ctx, "OST: num files = %u\n", lctx.numfiles); - - if (lfsck_get_last_id(ctx, &ost_hdr.ost_last_id)) { - fprintf(stderr, "Failure to get last id for objects\n"); - ctx->flags |= E2F_FLAG_ABORT; - goto out; - } - VERBOSE(ctx, "OST: last_id = "LPU64"\n", ost_hdr.ost_last_id); + for (i = 0; i < ctx->lustre_mds_files; i++) { + char dbname[256]; + sprintf(dbname, "%s.%d", OST_OSTDB, i); + rc = lfsck_opendb(ctx->lustre_ostdb, dbname, &outdb, 0, + sizeof(objent.ost_objid) + sizeof(objent), + fs->super->s_inodes_count - + fs->super->s_free_inodes_count, DB_HASH); + if (rc != 0) { + fprintf(stderr, "error getting ost_hdr in %s: %s\n", + ctx->lustre_ostdb, db_strerror(rc)); + ctx->flags |= E2F_FLAG_ABORT; + goto out; + } - /* Update the last_id value on the OST if necessary/possible to the - * MDS value if larger. Otherwise we risk creating duplicate objects. - * If running read-only, we skip this so new objects are ignored. */ - ost_hdr.ost_last_id = lctx.max_objid; - if (!(ctx->options & E2F_OPT_READONLY) && - !(mds_hdr.mds_flags & E2F_OPT_READONLY)) { - for (i = 0; i < mds_hdr.mds_num_osts; i++) { - if (strcmp((char *)mds_hdr.mds_ost_info[i].uuid, - (char *)ost_hdr.ost_uuid.uuid) == 0 && - mds_hdr.mds_max_ost_id[i] >= ost_hdr.ost_last_id) - ost_hdr.ost_last_id=mds_hdr.mds_max_ost_id[i]+1; + lctx.ctx = ctx; + lctx.outdb = outdb; + lctx.status = 0; + lctx.numfiles = 0; + lctx.max_objid = 0; + lctx.mds_group = i == 0 ? i : i + FIRST_MDT_GROUP - 1; + lctx.status = ext2fs_dir_iterate2(fs, dir[i], 0, block_buf, + lfsck_iterate_obj_dirs, &lctx); + if (lctx.status) { + fprintf(stderr, "Failure in iterating object dirs\n"); + ctx->flags |= E2F_FLAG_ABORT; + return; } + VERBOSE(ctx, "OST group %d : num files = %u\n", i, lctx.numfiles); - if (lfsck_set_last_id(ctx, ost_hdr.ost_last_id)) { - fprintf(stderr, "Failure to set last id\n"); - ctx->flags |= E2F_FLAG_ABORT; - goto out; - } + if (lfsck_get_last_id(ctx, &ost_hdr.ost_last_id[i], i)) { + fprintf(stderr, "Failure to get last id for objects\n"); + ctx->flags |= E2F_FLAG_ABORT; + goto out; + } + VERBOSE(ctx, "OST group %d : last_id = "LPU64"\n", i, ost_hdr.ost_last_id[i]); + /* Update the last_id value on the OST if necessary/possible to the + * MDS value if larger. Otherwise we risk creating duplicate objects. + * If running read-only, we skip this so new objects are ignored. */ + ost_hdr.ost_last_id[i] = lctx.max_objid; + if (!(ctx->options & E2F_OPT_READONLY)){ + int index, k; + for (index = 0; index < ctx->lustre_mds_files; index++) + if (i == mds_hdr[index].mds_index) + break; + assert(index != ctx->lustre_mds_files); + if (!(mds_hdr[index].mds_flags & E2F_OPT_READONLY)) { + for (k = 0; k < mds_hdr[index].mds_num_osts; k++) { + if (strcmp((char *)mds_hdr[index].mds_ost_info[k].uuid, + (char *)ost_hdr.ost_uuid.uuid) == 0 && + mds_hdr[index].mds_max_ost_id[k] >= ost_hdr.ost_last_id[i]) + ost_hdr.ost_last_id[i]=mds_hdr[index].mds_max_ost_id[k]+1; + } + if (lfsck_set_last_id(ctx, ost_hdr.ost_last_id[i], i)) { + fprintf(stderr, "Failure to set last id\n"); + ctx->flags |= E2F_FLAG_ABORT; + goto out; + } #ifdef LOG_REMOVAL - if (lfsck_remove_ost_logs(ctx, block_buf)) - ctx->flags |= E2F_FLAG_ABORT; + if (lfsck_remove_ost_logs(ctx, block_buf)) + ctx->flags |= E2F_FLAG_ABORT; #endif + } + } + ost_hdr.ost_mds_num ++; + outdb->close(outdb, 0); + outdb = NULL; } + ost_hdr.ost_magic = OST_MAGIC; + ost_hdr.ost_flags = ctx->options & E2F_OPT_READONLY; + ost_hdr.ost_num_files = lctx.numfiles; + memset(&key, 0, sizeof(key)); memset(&data, 0, sizeof(data)); key.data = &ost_hdr.ost_magic; @@ -1242,8 +1477,9 @@ void e2fsck_pass6_ost(e2fsck_t ctx) } out: - if (mds_hdrdb) - mds_hdrdb->close(mds_hdrdb, 0); + for (i = 0; i < ctx->lustre_mds_files; i ++) + if (mds_hdrdb[i]) + mds_hdrdb[i]->close(mds_hdrdb[i], 0); if (outdb) outdb->close(outdb, 0); if (osthdr) @@ -1287,6 +1523,662 @@ int lfsck_remove_mds_logs(e2fsck_t ctx) return (rc); } +/** + * On-disk format: + * iam mostly tries to reuse existing htree formats. + * + * Format of index node: + * + * +-----+-------+-------+-------+------+-------+------------+ + * | | count | | | | | | + * | gap | / | entry | entry | .... | entry | free space | + * | | limit | | | | | | + * +-----+-------+-------+-------+------+-------+------------+ + * + * gap this part of node is never accessed by iam code. It + * exists for binary compatibility with ldiskfs htree (that, + * in turn, stores fake struct ext2_dirent for ext2 + * compatibility), and to keep some unspecified per-node + * data. Gap can be different for root and non-root index + * nodes. Gap size can be specified for each container + * (gap of 0 is allowed). + * + * count/limit current number of entries in this node, and the maximal + * number of entries that can fit into node. count/limit + * has the same size as entry, and is itself counted in + * count. + * + * entry index entry: consists of a key immediately followed by + * a pointer to a child node. Size of a key and size of a + * pointer depends on container. Entry has neither + * alignment nor padding. + * + * free space portion of node new entries are added to + * + * Entries in index node are sorted by their key value. + **/ +struct iam_entry; +struct iam_lentry; + +struct iam_frame +{ + void *data; + struct iam_entry *entries; + struct iam_entry *at; + __u32 curidx; +}; + +struct iam_root { + __u64 ir_magic; + __u16 ir_keysize; + __u16 ir_recsize; + __u16 ir_ptrsize; + __u8 ir_indirect_levels; + __u8 ir_padding; +}; + +struct iam_countlimit { + __u16 limit; + __u16 count; +}; + + + +static inline struct iam_entry +*e2fs_iam_get_entries(struct iam_frame *frame, int level) +{ + return (level == 0 ? (frame->data + sizeof(struct iam_root)) : + frame->data); +} + +static inline unsigned e2fs_iam_get_count(struct iam_entry *entries) +{ + return ((struct iam_countlimit *) entries)->count; +} + +static inline int e2fs_iam_entry_size(struct iam_root *root) +{ + return (root->ir_keysize + root->ir_ptrsize); +} + +static inline struct iam_entry *e2fs_iam_entry_shift(struct iam_entry *entry, + int shift, + struct iam_root *root) +{ + void *e = entry; + return e + shift * e2fs_iam_entry_size(root); +} + +static inline unsigned long e2fs_iam_entry_diff(struct iam_root *root, + struct iam_entry *e1, + struct iam_entry *e2) +{ + unsigned long diff; + diff = (void *)e1 - (void *)e2; + return diff / e2fs_iam_entry_size(root); +} + +static struct iam_entry * +e2fs_iam_find_position(struct iam_frame *frame, __u64 key, + struct iam_root *root) +{ + unsigned count; + struct iam_entry *p; + struct iam_entry *q; + struct iam_entry *m; + + count = e2fs_iam_get_count(frame->entries); + p = e2fs_iam_entry_shift(frame->entries, 2, root); + q = e2fs_iam_entry_shift(frame->entries, count - 1, root); + while (p <= q) { + m = e2fs_iam_entry_shift(p, e2fs_iam_entry_diff(root, q, p) / 2, root); + if (memcmp(m, &key, root->ir_keysize) > 0) + q = e2fs_iam_entry_shift(m, -1, root); + else + p = e2fs_iam_entry_shift(m, +1, root); + } + return e2fs_iam_entry_shift(p, -1, root); +} + +static inline unsigned e2fs_iam_get_block(struct iam_root *root, + struct iam_entry *entry) +{ + return (__u32) (*(((char *) entry + root->ir_keysize))); +} + +#define IAM_LEAF_HEADER_MAGIC 0x1976 +struct iam_leaf_head { + __u16 ill_magic; + __u16 ill_count; +}; + +static int e2fs_iam_check_leaf(char *leaf) +{ + struct iam_leaf_head *hdr; + + hdr = (struct iam_leaf_head*)leaf; + if (hdr->ill_magic != IAM_LEAF_HEADER_MAGIC) + return -EINVAL; + return 0; +} + +static struct iam_lentry *e2fs_iam_lentries(void *leaf) +{ + return (void *)leaf + sizeof(struct iam_leaf_head); +} + +static inline int e2fs_iam_lentry_size(const struct iam_root *root) +{ + return root->ir_keysize + root->ir_recsize; +} + +static struct iam_lentry +*e2fs_iam_lentry_shift(struct iam_lentry *entry, int shift, + struct iam_root *root) +{ + return (void *)entry + shift * e2fs_iam_lentry_size(root); +} + +static void +*e2fs_iam_lentry_rec(struct iam_root *root, struct iam_lentry *entry) +{ + return (char *)entry + root->ir_keysize; +} + +static inline int +e2fs_iam_lentry_diff(struct iam_root *root, + struct iam_lentry *e1, + struct iam_lentry *e2) +{ + int diff; + int esize; + + esize = e2fs_iam_lentry_size(root); + diff = (void *)e1 - (void *)e2; + return diff / esize; +} + +#define IAM_ROOT_MAGIC 0xbedabb1edULL +static int e2fs_iam_find_leaf(ext2_filsys fs, ext2_ino_t inode, + ext2_file_t file, __u64 key, + struct iam_frame *frames, + struct iam_frame **frame, + struct iam_root *root) +{ + int rc = 0; + unsigned int got; + int i = 0; + __u32 ptr = 0; + struct iam_frame *p; + + p = &frames[0]; + while (1) { + p->at = e2fs_iam_find_position(p, key, root); + ptr = e2fs_iam_get_block(root, p->at); + if (++i > root->ir_indirect_levels) + break; + p++; + ext2fs_file_llseek(file, ptr * fs->blocksize, EXT2_SEEK_SET, NULL); + rc = ext2fs_file_read(file, p->data, fs->blocksize, &got); + if (rc != 0) + break; + } + *frame = p; + + return rc; +} + +typedef int (*iam_entry_cb_t)(struct iam_root *root, struct iam_lentry *entry, + void *data); + +static int fldb_fill_cb(struct iam_root *root, struct iam_lentry *entry, + void *cb_data) +{ + struct lfsck_mds_fldb *range; + int rc = 0; + DB *fldb = (DB *)cb_data; + seqno_t seq; + DBT key, data; + + range = (struct lfsck_mds_fldb *)e2fs_iam_lentry_rec(root, entry); + seq = *(seqno_t*)entry; + + seq = ext2fs_be64_to_cpu(seq); + range->lsr_start = ext2fs_be64_to_cpu(range->lsr_start); + range->lsr_end = ext2fs_be64_to_cpu(range->lsr_end); + range->lsr_index = ext2fs_be32_to_cpu(range->lsr_index); + range->lsr_flags = ext2fs_be64_to_cpu(range->lsr_flags); + + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.data = &seq; + key.size = sizeof(seq); + data.data = range; + data.size = sizeof(*range); + + if ((rc = fldb->put(fldb, NULL, &key, &data, 0)) != 0) { + fldb->err(fldb, rc, "db->put failed\n"); + return (EIO); + } + + return rc; +} + +static int oi_fill_cb(struct iam_root *root, struct iam_lentry *entry, + void *cb_data) +{ + struct osd_inode_id *id; + struct lu_fid *fid; + DB *oidb = (DB *)cb_data; + DBT key, data; + int rc; + + id = (struct osd_inode_id *)e2fs_iam_lentry_rec(root, entry); + fid = (struct lu_fid *)entry; + + id->oii_ino = ext2fs_be32_to_cpu(id->oii_ino); + id->oii_gen = ext2fs_be32_to_cpu(id->oii_gen); + + e2fsck_fid_be_to_cpu(fid, fid); + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + key.data = fid; + key.size = sizeof(struct lu_fid); + data.data = id; + data.size = sizeof(struct osd_inode_id); + + if ((rc = oidb->put(oidb, NULL, &key, &data, 0)) != 0) { + oidb->err(oidb, rc, "db->put failed\n"); + return (EIO); + } + + return 0; +} + +static int e2fs_iam_fill_db(ext2_filsys fs, struct iam_frame *frame, + struct iam_frame *frames, ext2_file_t file, + struct iam_root *root, DB *fldb, + iam_entry_cb_t callback) +{ + struct iam_leaf_head *ilh; + struct iam_lentry *p; + int rc; + unsigned int got; + int count; + int i; + __u32 ptr; + + ptr = e2fs_iam_get_block(root, frame->at); + ext2fs_file_llseek(file, ptr * fs->blocksize, EXT2_SEEK_SET, NULL); + frame ++; + assert(frame == frames + root->ir_indirect_levels + 1); + rc = ext2fs_file_read(file, frame->data, fs->blocksize, &got); + if (rc != 0) + return rc; + + rc = e2fs_iam_check_leaf(frame->data); + if (rc) + goto fail; + + ilh = (struct iam_leaf_head *)frame->data; + count = ilh->ill_count; + p = e2fs_iam_lentries(frame->data); + for (i = 0; i < count; i++) { + rc = callback(root, p, (void*)fldb); + if (rc) + break; + p = e2fs_iam_lentry_shift(p, 1, root); + } + rc = count; +fail: + return rc; +} + +/* copy from ext4_htree_next_block */ +static int e2fs_iam_next_block(ext2_filsys fs, + struct iam_root *root, + ext2_file_t file, + struct iam_frame *frame, + struct iam_frame *frames) +{ + struct iam_frame *p; + struct iam_entry *end; + int err = 0, num_frames = 0; + + p = frame; + /* + * Find the next leaf page by incrementing the frame pointer. + * If we run out of entries in the interior node, loop around and + * increment pointer in the parent node. When we break out of + * this loop, num_frames indicates the number of interior + * nodes need to be read. + */ + while (1) { + p->at = e2fs_iam_entry_shift(p->at, 1, root); + end = e2fs_iam_entry_shift(p->entries, e2fs_iam_get_count(p->entries), root); + if (p->at < end) + break; + if (p == frames) + return 0; + num_frames++; + p--; + } + + while (num_frames--) { + __u32 ptr; + unsigned int got; + + ptr = e2fs_iam_get_block(root, p->at); + ext2fs_file_llseek(file, ptr * fs->blocksize, EXT2_SEEK_SET, NULL); + p++; + err = ext2fs_file_read(file, p->data, fs->blocksize, &got); + p->at = p->entries = e2fs_iam_get_entries(p->data, !(p == frames)); + } + return 1; +} + +#define DX_MAX_TREE_HEIGHT 5 +static int e2fs_iam_iterate(e2fsck_t ctx, ext2_ino_t inode, DB *fldb, + iam_entry_cb_t callback) + +{ + ext2_filsys fs = ctx->fs; + ext2_file_t e2_file; + struct iam_frame frames[DX_MAX_TREE_HEIGHT + 1] = {{0}}; + struct iam_frame *frame; + struct iam_root *root; + int i; + int rc; + unsigned int got; + int count = 0; + __u32 ptr = 0; + + for (i = 0; i < DX_MAX_TREE_HEIGHT + 1; i++) { + frames[i].data = e2fsck_allocate_memory(ctx, fs->blocksize, + "iam iterate buffer"); + if (frames[i].data == NULL) { + rc = ENOMEM; + goto fail; + } + } + + /* Open the IAM file, Load and check the root*/ + rc = ext2fs_file_open(fs, inode, 0, &e2_file); + if (rc) + goto fail; + + rc = ext2fs_file_read(e2_file, frames[0].data, fs->blocksize, &got); + if (rc) + goto close; + + if (got != fs->blocksize) { + rc = EIO; + goto close; + } + root = (struct iam_root *)frames[0].data; + if (root->ir_magic != IAM_ROOT_MAGIC) { + rc = EINVAL; + goto close; + } + ptr = 0; + frames[0].curidx = ptr; + frames[0].entries = e2fs_iam_get_entries(&frames[0], 0); + /* Find the first leaf */ + rc = e2fs_iam_find_leaf(fs, inode, e2_file, 0, frames, &frame, root); + if (rc) + goto close; + + /* load the iam tree node */ + while (1) { + /* read leaf node */ + rc = e2fs_iam_fill_db(fs, frame, frames, e2_file, root, fldb, + callback); + if (rc < 0) + break; + count += rc; + rc = e2fs_iam_next_block(fs, root, e2_file, frame, frames); + if (rc == 0) + break; + } +close: + rc = ext2fs_file_close(e2_file); +fail: + for (i = 0; i < DX_MAX_TREE_HEIGHT + 1; i++) + if (frames[i].data) + ext2fs_free_mem((void *)(&frames[i].data)); + return rc; + +} + +#define FLD "fld" +static int lfsck_get_fldb(e2fsck_t ctx) +{ + ext2_filsys fs = ctx->fs; + ext2_ino_t fld_inode; + int rc = 0; + DB *fldb = NULL; + char *mdsdb; + + assert(ctx->lustre_mds_files == 1); + mdsdb = ctx->lustre_mdsdb[0]; + if (lfsck_opendb(mdsdb, MDS_FLDB, &fldb, 1, + sizeof(seqno_t) + sizeof(struct lfsck_mds_fldb), + 0, DB_BTREE)) { + fprintf(stderr, "failure to open database %s \n", MDS_FLDB); + rc = -EINVAL; + goto out; + } + + rc = ext2fs_lookup(fs, EXT2_ROOT_INO, FLD, strlen(FLD), NULL, + &fld_inode); + if (rc) { + ctx->flags |= E2F_FLAG_ABORT; + return (-ENOENT); + } + if (e2fs_iam_iterate(ctx, fld_inode, fldb, fldb_fill_cb)) { + ctx->flags |= E2F_FLAG_ABORT; + rc = -EIO; + } +out: + if (fldb) + fldb->close(fldb, 0); + return rc; +} + +#define OID "oi.16" +static int lfsck_get_oidb(e2fsck_t ctx) +{ + ext2_filsys fs = ctx->fs; + ext2_ino_t oi_inode; + int rc = 0; + DB *oidb = NULL; + char *mdsdb; + __u32 numfiles = fs->super->s_inodes_count - + fs->super->s_free_inodes_count; + + assert(ctx->lustre_mds_files == 1); + mdsdb = ctx->lustre_mdsdb[0]; + + if (lfsck_opendb(mdsdb, MDS_OIDB, &oidb, 1, + sizeof(seqno_t) + sizeof(struct lu_fid), numfiles, + DB_HASH)) { + fprintf(stderr, "failure to open database %s \n", MDS_OIDB); + rc = -EINVAL; + goto out; + } + + rc = ext2fs_lookup(fs, EXT2_ROOT_INO, OID, strlen(OID), NULL, + &oi_inode); + if (rc) { + ctx->flags |= E2F_FLAG_ABORT; + return (-ENOENT); + } + if (e2fs_iam_iterate(ctx, oi_inode, oidb, oi_fill_cb)) { + ctx->flags |= E2F_FLAG_ABORT; + rc = -EIO; + } +out: + if (oidb) + oidb->close(oidb, 0); + return rc; +} + +static int lfsck_mdt_save_slave(ext2_ino_t dir, int idx, + struct ext2_dir_entry *dirent, int offset, + int blocksize, char *buf, void *priv_data) +{ + struct ext2_dir_entry_2 *dirent2 = (struct ext2_dir_entry_2 *)dirent; + struct lfsck_mds_ctx *lctx = priv_data; + e2fsck_t ctx = lctx->ctx; + ext2_filsys fs = ctx->fs; + struct ext2_super_block *sb = fs->super; + struct ext2_inode_large *inode; + //struct ext2_inode *ext2_inode; + int inode_size = EXT2_INODE_SIZE(sb); + DB *dbp = lctx->outdb; + struct ext2_ext_attr_entry *entry; + char *len = dirent2->name + dirent2->name_len + 1 /* NUL terminator */; + struct lu_fid *fid; + __u32 *eamagic; + char *start, *end; + unsigned int storage_size, remain; + int rc = 0; + int min, max; + + + if (inode_size == EXT2_GOOD_OLD_INODE_SIZE) + /* this isn't large inode. so, nothing to check */ + return 0; + + if ((((dirent2->name_len & 0xFF) == 1) && (dirent2->name[0] == '.')) || + (((dirent2->name_len & 0xFF) == 2) && (dirent2->name[0] == '.') && + (dirent2->name[1] == '.'))) + return 0; + + fid = (struct lu_fid *)(len + 1); + + e2fsck_fid_be_to_cpu(fid, fid); + + inode = (struct ext2_inode_large *) + e2fsck_allocate_memory(lctx->ctx, inode_size, "scratch slave"); + if (ext2fs_read_inode_full(fs, dirent->inode, (struct ext2_inode *)inode, + inode_size)) { + fprintf(stderr, "read inode failed for %s "DFID"\n", + dirent2->name, PFID(fid)); + goto out; + } + + //inode = (struct ext2_inode_large *)ext2_inode; + /* i_extra_isize must cover i_extra_isize + i_pad1 at least */ + min = sizeof(inode->i_extra_isize) + sizeof(inode->i_pad1); + max = inode_size - EXT2_GOOD_OLD_INODE_SIZE; + + /* + * For now we will allow i_extra_isize to be 0, but really + * implementations should never allow i_extra_isize to be 0 + */ + if (inode->i_extra_isize && + (inode->i_extra_isize < min || inode->i_extra_isize > max)) + goto out; + + eamagic = IHDR(inode); + if (*eamagic != EXT2_EXT_ATTR_MAGIC) { + fprintf(stderr, "no EA for %s "DFID" %u eamagic %x %d \n", dirent2->name, + PFID(fid), dirent2->inode, *eamagic, inode->i_extra_isize); + goto out; + } + storage_size = inode_size - EXT2_GOOD_OLD_INODE_SIZE - + inode->i_extra_isize; + start = ((char *) inode) + EXT2_GOOD_OLD_INODE_SIZE + + inode->i_extra_isize + sizeof(__u32); + end = (char *) inode + EXT2_INODE_SIZE(ctx->fs->super); + entry = (struct ext2_ext_attr_entry *) start; + remain = storage_size - sizeof(__u32); + while (!EXT2_EXT_IS_LAST_ENTRY(entry)) { + remain -= sizeof(struct ext2_ext_attr_entry); + + if (entry->e_name_index == EXT3_XATTR_INDEX_TRUSTED && + !strncmp(entry->e_name, XATTR_LUSTRE_MDS_LMV_EA, entry->e_name_len)) { + struct lmv_user_md *lmv = (struct lmv_user_md *)(start + + entry->e_value_offs); + struct lfsck_mds_stripe_ent stripe_ent; + DBT key, data; + + letocpu_lmv_user_md(lmv); + if (lfsck_check_lmv_ea(ctx, lmv)) { + fprintf(stderr, "lmv check failed for %s "DFID"\n", + dirent2->name, PFID(fid)); + continue; + } + + e2fsck_fid_cpu_to_le(&stripe_ent.mds_fid, fid); + stripe_ent.mds_mdtidx = + ext2fs_cpu_to_le32(lmv->lum_objects[0].lum_mds); + e2fsck_fid_cpu_to_le(&stripe_ent.mds_mfid, + &lmv->lum_objects[0].lum_fid); + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.data = &fid; + key.size = sizeof(fid); + data.data = &stripe_ent; + data.size = sizeof(stripe_ent); + if ((rc = dbp->put(dbp, NULL, &key, &data, 0)) != 0) + dbp->err(dbp, rc, "db->put failed\n"); + } + /* If EA value is stored in external inode then it does not + * consume space here */ + if (entry->e_value_inum == 0) + remain -= entry->e_value_size; + + entry = EXT2_EXT_ATTR_NEXT(entry); + } +out: + ext2fs_free_mem(&inode); + return 0; +} + +#define SLAVE_DIR "OBJ" +static int lfsck_get_slaves(e2fsck_t ctx) +{ + ext2_filsys fs = ctx->fs; + ext2_ino_t dir; + int rc = 0; + DB *objdb = NULL; + char *mdsdb; + struct lfsck_mds_ctx lctx; + __u32 numfiles = fs->super->s_inodes_count - + fs->super->s_free_inodes_count; + + assert(ctx->lustre_mds_files == 1); + mdsdb = ctx->lustre_mdsdb[0]; + + if (lfsck_opendb(mdsdb, MDS_OBJDB, &objdb, 1, + sizeof(struct lfsck_mds_stripe_ent) + + sizeof(struct lu_fid), numfiles, + DB_HASH)) { + fprintf(stderr, "failure to open database %s \n", MDS_OBJDB); + rc = -EINVAL; + goto out; + } + + rc = ext2fs_lookup(fs, EXT2_ROOT_INO, SLAVE_DIR, strlen(SLAVE_DIR), + NULL, &dir); + if (rc) + return (-ENOENT); + + lctx.outdb = objdb; + lctx.ctx = ctx; + rc = ext2fs_dir_iterate2(fs, dir , 0, NULL, lfsck_mdt_save_slave, &lctx); + if (rc) { + ctx->flags |= E2F_FLAG_ABORT; + rc = -EIO; + } +out: + if (objdb) + objdb->close(objdb, 0); + return rc; +} /* * On the mds save the fid and directory information for each file. @@ -1301,19 +2193,22 @@ void e2fsck_pass6_mds(e2fsck_t ctx) struct lfsck_mds_hdr mds_hdr; DBT key, data; DB *outdb = NULL, *dbhdr = NULL; - __u32 compat, rocompat, incompat, index; + __u32 compat, rocompat, incompat; int rc, i; + char *mdsdb; clear_problem_context(&pctx); lctx.ctx = ctx; - + + assert(ctx->lustre_mds_files == 1); + mdsdb = ctx->lustre_mdsdb[0]; /* Found no files with EA on filesystem - empty */ if (ctx->lfsck_oinfo == NULL) { - if (unlink(ctx->lustre_mdsdb)) { + if (unlink(mdsdb)) { if (errno != ENOENT) { fprintf(stderr, "Failure to remove old " - "db file %s\n", ctx->lustre_mdsdb); + "db file %s\n", mdsdb); ctx->flags |= E2F_FLAG_ABORT; goto out; } @@ -1338,19 +2233,38 @@ void e2fsck_pass6_mds(e2fsck_t ctx) sizeof(struct lfsck_ofile_ctx) * LOV_MAX_OSTS); } - if (!(ctx->options & E2F_OPT_READONLY)) { + if (!(ctx->options & E2F_OPT_READONLY)) lfsck_write_mds_hdrinfo(ctx, ctx->lfsck_oinfo); + + rc = lfsck_get_fldb(ctx); + if (rc && rc != ENOENT) { + fprintf(stderr, "failure to get fldb \n"); + ctx->flags |= E2F_FLAG_ABORT; + goto out; } - if (lfsck_opendb(ctx->lustre_mdsdb, MDS_DIRINFO, &outdb, 1, + rc = lfsck_get_oidb(ctx); + if (rc && rc != -ENOENT) { + fprintf(stderr, "failure to get oi.16 \n"); + ctx->flags |= E2F_FLAG_ABORT; + goto out; + } + + rc = lfsck_get_slaves(ctx); + if (rc && rc != -ENOENT) { + fprintf(stderr, "failure to get slaves \n"); + ctx->flags |= E2F_FLAG_ABORT; + goto out; + } + + if (lfsck_opendb(mdsdb, MDS_DIRINFO, &outdb, 1, sizeof(mds_dirent.mds_fid) + sizeof(mds_dirent), fs->super->s_inodes_count - - fs->super->s_free_inodes_count)) { + fs->super->s_free_inodes_count, DB_HASH)) { fprintf(stderr, "failure to open database %s\n", MDS_DIRINFO); ctx->flags |= E2F_FLAG_ABORT; goto out; } - lctx.outdb = outdb; lctx.numfiles = 0; lctx.dot = EXT2_ROOT_INO; @@ -1381,14 +2295,15 @@ void e2fsck_pass6_mds(e2fsck_t ctx) } if (e2fsck_get_last_rcvd_info(ctx, &mds_hdr.mds_uuid, NULL, NULL, - &index, &compat, &rocompat, &incompat)) { + &mds_hdr.mds_index, &compat, &rocompat, &incompat)) { fprintf(stderr, "Failure to read MDS last_rcvd file\n"); ctx->flags |= E2F_FLAG_ABORT; goto out; } VERBOSE(ctx, "MDS: '%s' mdt idx %u: compat %#x rocomp %#x incomp %#x\n", - (char *)&mds_hdr.mds_uuid.uuid, index,compat,rocompat,incompat); + (char *)&mds_hdr.mds_uuid.uuid, mds_hdr.mds_index,compat,rocompat, + incompat); if (compat & OBD_COMPAT_OST || incompat & OBD_INCOMPAT_OST) { fprintf(stderr, "Found OST last_rcvd file doing MDS check\n"); @@ -1409,7 +2324,7 @@ void e2fsck_pass6_mds(e2fsck_t ctx) #endif } - rc = lfsck_opendb(ctx->lustre_mdsdb, MDS_HDR, &dbhdr, 0, 0, 0); + rc = lfsck_opendb(mdsdb, MDS_HDR, &dbhdr, 0, 0, 0, DB_HASH); if (rc != 0) { fprintf(stderr, "failure to open database %s: %s\n", MDS_HDR, db_strerror(rc)); diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/unix.c ./e2fsprogs-1.41.12.2.ora1/e2fsck/unix.c --- old/e2fsprogs-1.41.12.2.ora1/e2fsck/unix.c 2010-10-08 03:48:33.000000000 -0700 +++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/unix.c 2011-01-09 22:57:21.074566687 -0800 @@ -396,8 +396,11 @@ static void check_if_skip(e2fsck_t ctx) fputc('\n', stdout); ext2fs_close(fs); ctx->fs = NULL; - if (ctx->lustre_mdsdb) - free(ctx->lustre_mdsdb); + if (ctx->lustre_mdsdb) { + int i; + for (i = 0; i < ctx->lustre_mds_files; i++) + free(ctx->lustre_mdsdb[i]); + } if (ctx->lustre_ostdb) free(ctx->lustre_ostdb); if (ctx->lfsck_oinfo) @@ -832,35 +835,37 @@ static errcode_t PRS(int argc, char *arg long_options, &option_index)) != EOF) switch (c) { case 1: { - char *dbpath, *tmp; + char *dbpath, *p, *mdt_path; + char tmp[PATH_MAX]; if (!optarg) usage(ctx); + p = optarg; + fprintf(stdout, "MDSDB[%u]: %s\n", ctx->lustre_mds_files, optarg); + do { + dbpath = malloc(PATH_MAX); + if (dbpath == NULL) { + fprintf(stderr, "Out of memory\n"); + exit(1); + } + mdt_path = strsep(&p, ","); + strcpy(tmp, optarg); + if (realpath(my_dirname(tmp), dbpath) == NULL) { + int i; + fprintf(stderr, "Failure to resolve path %s\n", + optarg); + for (i = 0; i < ctx->lustre_mds_files; i++) + free(ctx->lustre_mdsdb[i]); + exit(1); + } + + strcpy(tmp, mdt_path); + sprintf(dbpath+strlen(dbpath), "/%s", my_basename(tmp)); + ctx->lustre_mdsdb[ctx->lustre_mds_files] = dbpath; + ctx->lustre_mds_files ++; + ctx->lustre_devtype |= LUSTRE_MDS; - dbpath = malloc(PATH_MAX); - if (dbpath == NULL) { - fprintf(stderr, "Out of memory\n"); - exit(1); - } - tmp = malloc(PATH_MAX); - if (tmp == NULL) { - fprintf(stderr, "Out of memory\n"); - exit(1); - } - - strcpy(tmp, optarg); - if (realpath(my_dirname(tmp), dbpath) == NULL) { - fprintf(stderr, "Failure to resolve path %s\n", - optarg); - exit(1); - } - - strcpy(tmp, optarg); - sprintf(dbpath+strlen(dbpath), "/%s", my_basename(tmp)); - ctx->lustre_mdsdb = dbpath; - ctx->lustre_devtype |= LUSTRE_MDS; - - free(tmp); + } while (p != NULL); break; } case 2: { @@ -1823,8 +1828,11 @@ no_journal: free(ctx->journal_name); if (ctx->lfsck_oinfo) e2fsck_lfsck_cleanupdb(ctx); - if (ctx->lustre_mdsdb) - free(ctx->lustre_mdsdb); + if (ctx->lustre_mdsdb) { + int i; + for (i = 0; i < ctx->lustre_mds_files; i++) + free(ctx->lustre_mdsdb[i]); + } if (ctx->lustre_ostdb) free(ctx->lustre_ostdb);