diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/e2fsck.h ./e2fsprogs-1.41.12.2.ora1/e2fsck/e2fsck.h
--- old/e2fsprogs-1.41.12.2.ora1/e2fsck/e2fsck.h	2010-10-08 03:48:33.000000000 -0700
+++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/e2fsck.h	2011-01-09 21:47:00.450433703 -0800
@@ -337,8 +337,10 @@ struct e2fsck_struct {
 	char	*journal_name;
 
 	/* lustre support */
+#define LMV_MAX_MDTS 8
 	int                      lustre_devtype;
-	char                    *lustre_mdsdb;
+	int			 lustre_mds_files;
+	char                    *lustre_mdsdb[LMV_MAX_MDTS];
 	char                    *lustre_ostdb;
 	struct lfsck_outdb_info *lfsck_oinfo;
 
diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.c ./e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.c
--- old/e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.c	2010-10-08 03:48:33.000000000 -0700
+++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.c	2011-01-16 10:12:44.660964172 -0800
@@ -83,10 +83,19 @@ struct lfsck_fids {
 	__u64 *fids;
 };
 
+struct lfsck_mdt_check_info {
+	struct lfsck_mds_hdr *mdt_hdr;
+	DB *mdt_fldb;
+	DB *mdt_dfiddb;
+	DB *mdt_oidb;
+       DB *mdt_hdrdb;
+       DB *mdt_direntdb;
+       DB *mdt_sizeinfodb;
+};
+
 struct lfsck_thread_info {
 	struct lfsck_mds_hdr *mds_hdr;
-	DB *mds_direntdb;
-	DB *mds_sizeinfodb;
+	struct lfsck_mdt_check_info *mds_info;
 	__u32 start_ost_idx;
 	__u32 end_ost_idx;
 	int status;
@@ -132,13 +141,15 @@ char mnt_path[PATH_MAX];
 char *mds_file;
 char lostandfounddir[PATH_MAX];
 char dupedir[PATH_MAX];
-char *ost_files[LOV_MAX_OSTS];
+char *ost_files[LOV_MAX_OSTS] = { NULL };
+char *mdt_files[LMV_MAX_MDTS] = { NULL };
 int num_ost_files;
+int num_mdt_files;
 
 struct obd_uuid lfsck_uuid[LOV_MAX_OSTS];
 int lov_tgt_count = LOV_MAX_OSTS;
 
-struct lfsck_saved_duplicates *lfsck_duplicates;
+struct lfsck_saved_duplicates *lfsck_duplicates = NULL;
 int lfsck_dup_saved;
 int num_renamed;
 int fixed;
@@ -316,24 +327,35 @@ int parse_args(int argc, char *argv[])
 			lfsck_save++;
 			break;
 		case 'm':
-			VERBOSE(1, "MDSDB: %s\n", optarg);
-			dbpath = malloc(PATH_MAX);
-			if (dbpath == NULL) {
-				fprintf(stderr, "error allocating dbpath\n");
-				return -ENOMEM;
-			}
-			strcpy(tmp, optarg);
-			if (realpath(my_dirname(tmp), dbpath) == NULL) {
-				fprintf(stderr, "Failure to resolve path %s\n",
-					optarg);
-				free(dbpath);
-				exit(1);
-			}
-
-			strcpy(tmp, optarg);
-			sprintf(dbpath+strlen(dbpath), "/%s", my_basename(tmp));
-			mds_file = dbpath;
+                {                
+			char *mdt_path;
+			VERBOSE(1, "MDSDB[%u]: %s\n", num_mdt_files, optarg);
+			p1 = optarg;
+			do {
+                                dbpath = malloc(PATH_MAX);
+                                if (dbpath == NULL) {
+                                        fprintf(stderr, "error allocating dbpath\n");
+                                        return -ENOMEM;
+                                }
+				/* Old-style arguments are comma separated */
+				mdt_path = strsep(&p1, ",");
+                                strcpy(tmp, optarg);
+                                if (realpath(my_dirname(tmp), dbpath) == NULL) {
+                                        fprintf(stderr, "Failure to resolve path %s\n",
+                                                optarg);
+					for (c = 0; c < num_mdt_files; c++)
+						free(mdt_files[c]);
+                                        free(dbpath);
+                                        exit(1);
+                                }
+				strcpy(tmp, mdt_path);
+				sprintf(dbpath+strlen(dbpath), "/%s",
+					my_basename(tmp));
+				mdt_files[num_mdt_files] = dbpath;
+				num_mdt_files++;
+			} while (p1 != NULL);
 			break;
+                }
 		case 'n':
 			lfsck_create = 0;
 			lfsck_delete = 0;
@@ -378,7 +400,6 @@ int parse_args(int argc, char *argv[])
 				ost_files[num_ost_files] = dbpath;
 				num_ost_files++;
 			} while (p1 != NULL);
-
 			break;
 		}
 		case 't':
@@ -442,7 +463,7 @@ int parse_args(int argc, char *argv[])
 		}
 	}
 
-	if (mds_file == NULL || ost_files[0] == NULL) {
+	if (mdt_files[0] == NULL || ost_files[0] == NULL) {
 		fprintf(stderr, "--mdsdb or --ostdb unspecified\n");
 		return(-EINVAL);
 	}
@@ -761,7 +782,7 @@ int lfsck_recreate_obj(__u64 mds_fid, __
  */
 int lfsck_calc_size(struct lfsck_mds_objent *mds_obj,
 		    struct lfsck_ost_objent *ost_obj,
-		    DB *mds_sizeinfodb)
+		    DB *mdt_sizeinfodb)
 {
 	struct lfsck_mds_szinfo mds_szinfo1;
 	__u64 calc_size;
@@ -780,7 +801,7 @@ int lfsck_calc_size(struct lfsck_mds_obj
 	data.data = &mds_szinfo1;
 	data.size = data.ulen = sizeof(mds_szinfo1);
 	data.flags = DB_DBT_USERMEM;
-	if ((rc = mds_sizeinfodb->get(mds_sizeinfodb, NULL, &key, &data, 0))) {
+	if ((rc = mdt_sizeinfodb->get(mdt_sizeinfodb, NULL, &key, &data, 0))) {
 		log_write("Failure to get sizeinfo "LPU64"\n",mds_obj->mds_fid);
 		pthread_mutex_unlock(&size_lock);
 		return (-ENOENT);
@@ -818,7 +839,7 @@ int lfsck_calc_size(struct lfsck_mds_obj
 		data.size = sizeof(mds_szinfo1);
 		cputole_mds_szinfo(&mds_szinfo1);
 		/* Make sure we overwrite */
-		if ((rc = mds_sizeinfodb->put(mds_sizeinfodb,
+		if ((rc = mdt_sizeinfodb->put(mdt_sizeinfodb,
 					      NULL, &key, &data, 0)) != 0 ) {
 			log_write("Failure to update sizeinfo data\n");
 			pthread_mutex_unlock(&size_lock);
@@ -836,7 +857,7 @@ int lfsck_calc_size(struct lfsck_mds_obj
  */
 int lfsck_run_pass2(__u32 ost_idx, struct lfsck_mds_hdr *mds_hdr,
 		    DB *mds_ostdb, DB *ostdb,
-		    DB *mds_direntdb, DB *mds_sizeinfodb)
+		    DB *mds_direntdb, DB *mdt_sizeinfodb)
 {
 	struct lfsck_mds_objent mds_obj1;
 	struct lfsck_ost_objent ost_obj1;
@@ -914,7 +935,7 @@ int lfsck_run_pass2(__u32 ost_idx, struc
 			}
 		}
 #ifdef CHECK_SIZE
-		if (lfsck_calc_size(&mds_obj1, &ost_obj1, mds_sizeinfodb)) {
+		if (lfsck_calc_size(&mds_obj1, &ost_obj1, mdt_sizeinfodb)) {
 			log_write("[%u]: error updating file size for object "
 				  LPU64": %s\n", ost_idx,objid,db_strerror(rc));
 			rc = -EINVAL;
@@ -983,7 +1004,7 @@ int lfsck_fix_orphan(__u32 ost_idx, __u6
 	lum->lmm_stripe_offset = 0;
 	lum->lmm_stripe_count = 1;
 	lum->lmm_objects[0].l_object_id = ost_objid;
-	lum->lmm_objects[0].l_object_gr = ost_group;
+	lum->lmm_objects[0].l_object_seq = ost_group;
 	lum->lmm_objects[0].l_ost_gen = 0;
 	lum->lmm_objects[0].l_ost_idx = ost_idx;
 
@@ -1163,7 +1184,7 @@ int lfsck_list_affected_files(char *mds_
 	}
 
 	sprintf(dbname, "%s.%d", MDS_OSTDB, ost_idx);
-	if ((rc = lfsck_opendb(mds_file, dbname, &mds_db, 1, 0, 0)) != 0) {
+	if ((rc = lfsck_opendb(mds_file, dbname, &mds_db, 1, 0, 0, DB_HASH)) != 0) {
 		log_write("failed to open mds db file %s\n", mds_file);
 		rc = -EINVAL;
 		goto out;
@@ -1209,135 +1230,198 @@ out:
 	return(rc);
 }
 
+static int lfsck_get_mdt_hdr(DB *mdsdb, struct lfsck_mds_hdr *mdt_hdr)
+{
+        DBT key, data;
+        int rc;
+
+        assert(mdt_hdr != NULL);
+                
+        mdt_hdr->mds_magic = MDS_MAGIC;
+        
+        memset(&key, 0, sizeof(key));
+        memset(&data, 0, sizeof(data));
+        key.data = &mdt_hdr->mds_magic;
+        key.size = sizeof(mdt_hdr->mds_magic);
+        data.data = mdt_hdr;
+        data.size = sizeof(*mdt_hdr);
+        data.ulen = sizeof(*mdt_hdr);
+        data.flags = DB_DBT_USERMEM;
+
+        rc = mdsdb->get(mdsdb, NULL, &key, &data, 0);
+        if (rc != 0) {
+                log_write("%s: error getting mdt_hdr info: %s\n",
+                          progname, db_strerror(rc));
+                return rc;
+        }
+        letocpu_mds_hdr(mdt_hdr);
+        return rc;
+}
+
 /*
  * For each ost index run checks 1 2 and 3.
  * 1) Check for object referenced by more than one file
  * 2) Check that objects exist on ost
  * 3) Check that containg mds entry exists for an object
  */
-int run_test(__u32 ost_idx, struct lfsck_mds_hdr *mds_hdr,
-	     DB *mds_direntdb, DB *mds_sizeinfodb )
+int run_test(__u32 ost_idx, struct lfsck_mdt_check_info *mdt_info)
 {
+        struct lfsck_mds_hdr *mdt_hdr = NULL;
 	struct lfsck_ost_hdr *ost_hdr = NULL;
-	char dbname[256];
+	char dbname[256], ost_dbname[256];
 	DB *mds_ostdb = NULL;
 	DB *ost_db = NULL;
+        DB *mdt_hdrdb = NULL;
 	DBT key, data;
 	__u64 last_id;
-	int i, rc;
+	int i, j, rc;
+
+        mdt_hdr = malloc(sizeof(*mdt_hdr));
+        if (mdt_hdr == NULL) {
+                log_write("Failure to alloc memory \n");
+                rc = -ENOMEM;
+                goto out;
+        }
+
+        ost_hdr = malloc(sizeof(*ost_hdr));
+        if (ost_hdr == NULL) {
+                log_write("Failure to alloc memory\n");
+                rc = -ENOMEM;
+                goto out;
+        }
 
 	sprintf(dbname, "%s.%d", MDS_OSTDB, ost_idx);
 
 	VERBOSE(2, "testing ost_idx %d\n", ost_idx);
 
-	rc = lfsck_opendb(mds_file, dbname, &mds_ostdb, 1, 0, 0);
-	if (rc != 0) {
-		log_write("failed to open mds db file %s: %s\n",
-			  mds_file, db_strerror(rc));
-		goto out;
-	}
-
-	ost_hdr = malloc(sizeof(*ost_hdr));
-	if (ost_hdr == NULL) {
-		log_write("Failure to alloc memory\n");
-		rc = -ENOMEM;
-		goto out;
-	}
-
-
-	VERBOSE(2, "looking for index %u UUID %s\n", ost_idx,
-		lfsck_uuid[ost_idx].uuid);
-
-	for (i = 0; i < num_ost_files; i++) {
-		VERBOSE(2, "checking file %s\n", ost_files[i]);
-		rc = lfsck_opendb(ost_files[i], OST_HDR, &ost_db, 0, 0, 0);
-		if (rc != 0) {
-			log_write("Error opening ost_data_file %s: rc %d\n",
-				ost_files[i], rc);
-			goto out;
-		}
-		memset(&key, 0, sizeof(key));
-		memset(&data, 0, sizeof(data));
-		ost_hdr->ost_magic = OST_MAGIC;
-		key.data = &ost_hdr->ost_magic;
-		key.size = sizeof(ost_hdr->ost_magic);
-		data.size = data.ulen = sizeof(*ost_hdr);
-		data.data = ost_hdr;
-		data.flags = DB_DBT_USERMEM;
-
-		rc = ost_db->get(ost_db, NULL, &key, &data, 0);
-		ost_db->close(ost_db, 0);
-		ost_db = NULL;
-		if (rc != 0) {
-			log_write("Invalid ost magic on file %s: rc %s\n",
-				  ost_files[i], db_strerror(rc));
-			continue;
-		}
-
-		letocpu_ost_hdr(ost_hdr);
-		VERBOSE(2, "%s has ost UUID %s\n", ost_files[i],
-			ost_hdr->ost_uuid.uuid);
-
-		if (obd_uuid_equals(&lfsck_uuid[ost_idx], &ost_hdr->ost_uuid)) {
-			if (ost_hdr->ost_index != ost_idx) {
-				log_write("Requested ost_idx %u doesn't match "
-					  "index %u found in %s\n", ost_idx,
-					  ost_hdr->ost_index, ost_files[i]);
-				continue;
-			}
-
-			break;
-		}
-	}
-
-	if (i == num_ost_files) {
-		log_write("lfsck: can't find file for ost_idx %d\n", ost_idx);
-		rc = lfsck_list_affected_files(mds_file, mds_hdr,
-					       mds_direntdb, ost_idx);
-		goto out;
-	}
-	rc = lfsck_opendb(ost_files[i], OST_OSTDB, &ost_db, 0, 0, 0);
-	if (rc != 0) {
-		log_write("error opening ost_data_file %s: rc %d\n",
-			ost_files[i], rc);
-		goto out;
-	}
-
-	VERBOSE(1, "MDS: max_id "LPU64" OST: max_id "LPU64"\n",
-		mds_hdr->mds_max_ost_id[ost_idx], ost_hdr->ost_last_id);
-
-	rc = lfsck_run_pass1(ost_idx, mds_ostdb, ost_db, mds_direntdb);
-	if (rc != 0) {
-		log_write("error in running pass1\n");
-		goto out;
-	}
-
-	rc = lfsck_run_pass2(ost_idx, mds_hdr, mds_ostdb, ost_db, mds_direntdb,
-			     mds_sizeinfodb);
-	if (rc != 0) {
-		log_write("error in running pass2\n");
-		goto out;
-	}
-
-	last_id = (ost_hdr->ost_flags & E2F_OPT_READONLY ||
-		   mds_hdr->mds_flags & E2F_OPT_READONLY) ?
-			mds_hdr->mds_max_ost_id[ost_idx] : ost_hdr->ost_last_id;
-
-	rc = lfsck_run_pass3(ost_idx, mds_ostdb, ost_db, ost_hdr->ost_uuid,
-			     last_id);
-	if (rc != 0) {
-		log_write("error in running pass3\n");
-		goto out;
-	}
-	rc = 0;
-
+        for (j = 0; j < num_mdt_files; j++) {
+                rc = lfsck_opendb(mdt_files[j], dbname, &mds_ostdb, 1, 0, 0, DB_HASH);
+                if (rc != 0) {
+                        log_write("failed to open mds db file %s: %s\n",
+                                  mdt_files[j], db_strerror(rc));
+                        goto out;
+                }
+
+                rc = lfsck_opendb(mdt_files[j], MDS_HDR, &mdt_hdrdb, 0, 0, 0, DB_HASH);
+                if (rc != 0) {
+                        log_write("failed to open mds db file %s: %s\n",
+                                  mdt_files[j], db_strerror(rc));
+                        goto out;
+                }
+                
+                rc = lfsck_get_mdt_hdr(mdt_hdrdb, mdt_hdr);
+                if (rc) {
+                        log_write("Failure to get mdt_hdr %s \n", mdt_files[j]);
+                        goto out;
+                }
+                mdt_hdrdb->close(mdt_hdrdb, 0);
+                mdt_hdrdb = NULL;
+
+                VERBOSE(2, "looking for index %u UUID %s\n", ost_idx,
+                        lfsck_uuid[ost_idx].uuid);
+
+                for (i = 0; i < num_ost_files; i++) {
+                        VERBOSE(2, "checking file %s\n", ost_files[i]);
+                        rc = lfsck_opendb(ost_files[i], OST_HDR, &ost_db, 0, 0, 0, DB_HASH);
+                        if (rc != 0) {
+                                log_write("Error opening ost_data_file %s: rc %d\n",
+                                        ost_files[i], rc);
+                                goto out;
+                        }
+                        memset(&key, 0, sizeof(key));
+                        memset(&data, 0, sizeof(data));
+                        ost_hdr->ost_magic = OST_MAGIC;
+                        key.data = &ost_hdr->ost_magic;
+                        key.size = sizeof(ost_hdr->ost_magic);
+                        data.size = data.ulen = sizeof(*ost_hdr);
+                        data.data = ost_hdr;
+                        data.flags = DB_DBT_USERMEM;
+
+                        rc = ost_db->get(ost_db, NULL, &key, &data, 0);
+                        ost_db->close(ost_db, 0);
+                        ost_db = NULL;
+                        if (rc != 0) {
+                                log_write("Invalid ost magic on file %s: rc %s\n",
+                                          ost_files[i], db_strerror(rc));
+                                continue;
+                        }
+
+                        letocpu_ost_hdr(ost_hdr);
+                        VERBOSE(2, "%s has ost UUID %s\n", ost_files[i],
+                                ost_hdr->ost_uuid.uuid);
+
+                        if (obd_uuid_equals(&lfsck_uuid[ost_idx], &ost_hdr->ost_uuid)) {
+                                if (ost_hdr->ost_index != ost_idx) {
+                                        log_write("Requested ost_idx %u doesn't match "
+                                                  "index %u found in %s\n", ost_idx,
+                                                  ost_hdr->ost_index, ost_files[i]);
+                                        continue;
+                                }
+
+                                break;
+                        }
+                }
+
+                if (i == num_ost_files) {
+                        log_write("lfsck: can't find file for ost_idx %d\n", ost_idx);
+                        rc = lfsck_list_affected_files(mdt_files[j], mdt_hdr,
+                                                       mdt_info[i].mdt_direntdb, ost_idx);
+                        goto out;
+                }
+                
+                sprintf(ost_dbname, "%s.%d", OST_OSTDB, mdt_hdr->mds_index); 
+                rc = lfsck_opendb(ost_files[i], ost_dbname, &ost_db, 0, 0, 0, DB_HASH);
+                if (rc != 0) {
+                        log_write("error opening ost_data_file %s: rc %d\n",
+                                ost_files[i], rc);
+                        goto out;
+                }
+
+                /* FIXME */
+                VERBOSE(1, "MDS%d: max_id "LPU64" OST: max_id "LPU64"\n", mdt_hdr->mds_index,
+                        mdt_info[j].mdt_hdr->mds_max_ost_id[ost_idx], ost_hdr->ost_last_id[0]);
+
+                rc = lfsck_run_pass1(ost_idx, mds_ostdb, ost_db, mdt_info[j].mdt_direntdb);
+                if (rc != 0) {
+                        log_write("error in running pass1\n");
+                        goto out;
+                }
+
+                rc = lfsck_run_pass2(ost_idx, mdt_info[j].mdt_hdr, mds_ostdb, ost_db, 
+                                     mdt_info[j].mdt_direntdb, mdt_info[j].mdt_sizeinfodb);
+                if (rc != 0) {
+                        log_write("error in running pass2\n");
+                        goto out;
+                }
+
+                /* FIXME */
+                last_id = (ost_hdr->ost_flags & E2F_OPT_READONLY ||
+                           mdt_info[j].mdt_hdr->mds_flags & E2F_OPT_READONLY) ?
+                                mdt_info[j].mdt_hdr->mds_max_ost_id[ost_idx] : 
+                                                        ost_hdr->ost_last_id[0];
+
+                rc = lfsck_run_pass3(ost_idx, mds_ostdb, ost_db, ost_hdr->ost_uuid,
+                                     last_id);
+                if (rc != 0) {
+                        log_write("error in running pass3\n");
+                        goto out;
+                }
+		mds_ostdb->close(mds_ostdb, 0);
+                ost_db->close(ost_db, 0);
+                rc = 0;
+                mds_ostdb = ost_db = NULL;
+        }
 out:
+        if (mdt_hdr)
+                free(mdt_hdr);
 	if (ost_hdr)
 		free(ost_hdr);
 	if (mds_ostdb)
 		mds_ostdb->close(mds_ostdb, 0);
 	if (ost_db)
 		ost_db->close(ost_db, 0);
+        if (mdt_hdrdb)
+                mdt_hdrdb->close(mdt_hdrdb, 0);
 
 	return(rc);
 }
@@ -1465,14 +1549,14 @@ void llapi_cancel_osc_locks(const char *
 /* Duplicate an object that is referenced by multiple files and point one
  * of the files to use the duplicated object */
 int lfsck_fix_duplicate(__u64 mds_fid, __u32 mds_generation,
-			__u32 ost_idx, __u64 ost_objid, DB *mds_direntdb)
+			__u32 ost_idx, __u64 ost_objid, DB *mdt_direntdb)
 {
 	char path_tmp[PATH_MAX] = { 0 }, path[PATH_MAX] = { 0 };
 	char tmp[PATH_MAX * 2 + 10] = { 0 };
 	const char *base;
 	int rc;
 
-	if (lfsck_get_path(mds_fid, mds_direntdb, path, sizeof(path))) {
+	if (lfsck_get_path(mds_fid, mdt_direntdb, path, sizeof(path))) {
 		log_write("%s: [%u]: failed to locate FID "LPU64
 			  " duplicate objid "LPU64"\n", progname,
 			  ost_idx, mds_fid, ost_objid);
@@ -1546,12 +1630,11 @@ out:
  * Check for files found that reference the same ost objects
  * (found in pass1) and repair now if necessary
  */
-int lfsck_run_pass4(DB *mds_direntdb)
+int lfsck_run_pass4(DB *mdt_direntdb)
 {
 	char tmp[PATH_MAX + 512];
 	int i, j;
 
-	log_write("lfsck: pass4: check for duplicate object references\n");
 	if (lfsck_dup_saved == 0) {
 		log_write("lfsck: pass4 OK (no duplicates)\n");
 		return(0);
@@ -1565,7 +1648,7 @@ int lfsck_run_pass4(DB *mds_direntdb)
 					lfsck_duplicates[i].mds_generation,
 					lfsck_duplicates[i].ost_idx,
 					lfsck_duplicates[i].objid,
-					mds_direntdb)) {
+					mdt_direntdb)) {
 			fix_failed++;
 		}
 
@@ -1592,7 +1675,7 @@ int lfsck_run_pass4(DB *mds_direntdb)
  * This is a placeholder to check for filesize correctness no fixup is in
  * place right now since file size is still obtained from osts
  */
-int lfsck_run_pass5(DB *mds_direntdb, DB *mds_sizeinfodb)
+int lfsck_run_pass5(DB *mdt_direntdb, DB *mdt_sizeinfodb)
 {
 	int rc = 0;
 #ifdef CHECK_SIZE
@@ -1602,9 +1685,8 @@ int lfsck_run_pass5(DB *mds_direntdb, DB
 	DBT key,data;
 	DBC *dbcp;
 
-	log_write("lfsck: pass5: file size correctness\n");
 
-	if ((rc = mds_sizeinfodb->cursor(mds_sizeinfodb, NULL, &dbcp, 0)) != 0){
+	if ((rc = mdt_sizeinfodb->cursor(mdt_sizeinfodb, NULL, &dbcp, 0)) != 0){
 		log_write("%s: error acquiring cursor for database: %s\n",
 			  progname, db_strerror(rc));
 		rc = -EINVAL;
@@ -1619,7 +1701,7 @@ int lfsck_run_pass5(DB *mds_direntdb, DB
 		letocpu_mds_szinfo(&mds_szinfo1);
 
 		if (mds_szinfo1.mds_size != mds_szinfo1.mds_calc_size) {
-			if (lfsck_get_path(mds_szinfo1.mds_fid, mds_direntdb,
+			if (lfsck_get_path(mds_szinfo1.mds_fid, mdt_direntdb,
 					   path, sizeof(path))) {
 				log_write("%s: failed to get path and update "
 					  "size for fid "LPU64"\n",
@@ -1700,8 +1782,7 @@ void *lfsck_start_thread(void *arg)
 	if (!all_started)
 		pthread_exit(NULL);
 	for (i = tinfo->start_ost_idx; i < tinfo->end_ost_idx; i++) {
-		rc = run_test(i, tinfo->mds_hdr, tinfo->mds_direntdb,
-			      tinfo->mds_sizeinfodb);
+		rc = run_test(i, tinfo->mds_info);
 		if (rc) {
 			log_write("lfsck: ost_idx %d: error running check\n",i);
 			tinfo->status = rc;
@@ -1710,83 +1791,342 @@ void *lfsck_start_thread(void *arg)
 	pthread_exit(NULL);
 }
 
+static int lfsck_get_mdt_index(struct lfsck_mdt_check_info *mdt_info, 
+                               struct lu_fid *fid)
+{
+        DBT key, data;
+        int i, rc = 0;
+        struct lfsck_mds_fldb range;
+
+        for (i = 0; i < num_mdt_files; i++) {
+                DB *fldb = mdt_info[i].mdt_fldb;
+                DBC *dbcp;
+
+                memset(&key, 0, sizeof(key));
+                memset(&data, 0, sizeof(data));
+                data.data = &range;
+                data.size = data.ulen = sizeof(range);
+                data.flags = DB_DBT_USERMEM;
+                rc = fldb->cursor(fldb, NULL, &dbcp, 0);
+                if (rc) {
+                        log_write("open mdt_fldb %s failed %d \n", 
+                                   mdt_files[i], rc);
+                        continue;
+                }
+                rc = dbcp->c_get(dbcp, &key, &data, DB_FIRST);
+                if (rc) {
+                        log_write("get dbcp %s failed %s \n",
+                                  mdt_files[i], db_strerror(rc));
+                        dbcp->c_close(dbcp);
+                        continue;
+                }
+                while ((rc = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) {
+                        if (fid->f_seq >= range.lsr_start && 
+                            fid->f_seq < range.lsr_end) {
+                                dbcp->c_close(dbcp);
+                                return range.lsr_index;
+                        }
+                }
+                dbcp->c_close(dbcp);
+        }
+
+        return -1;
+}
+
+static int lfsck_locate_fid_in_oi(struct lfsck_mdt_check_info *mdt_info,
+                                  int index, struct lu_fid *fid)
+{
+        int i;
+        int ret = -1;
+
+        for (i = 0; i < num_mdt_files; i++) {
+                if (index == mdt_info[i].mdt_hdr->mds_index) {
+                        DB *oidb = mdt_info[i].mdt_oidb;
+                        struct osd_inode_id inode;
+                        DBT key, data;
+                
+                        memset(&key, 0, sizeof(key));
+                        memset(&data, 0, sizeof(data));
+                        data.data = &inode;
+                        data.size = data.ulen = sizeof(inode);
+                        data.flags = DB_DBT_USERMEM;
+                        key.data = fid;
+                        key.size = sizeof(*fid);
+                        ret = oidb->get(oidb, NULL, &key, &data, 0);
+                        break;
+                }
+        }
+        return ret;
+}
+
+static int lfsck_locate_fid(struct lfsck_mdt_check_info *mdt_info,
+                            struct lu_fid *fid)
+{ 
+        int index;
+        int ret;       
+ 
+        index = lfsck_get_mdt_index(mdt_info, fid);
+        if (index == -1) {
+               fprintf(stderr, "round %d \n", index);
+               log_write("can not locate "DFID" in fldb \n", PFID(fid));
+               return -1; 
+        }
+
+        /*Locate fid in oidb database */
+        ret = lfsck_locate_fid_in_oi(mdt_info, index, fid);
+        return ret;
+}
+
+static int lfsck_mdt_check_pass1(struct lfsck_mdt_check_info *mdt_info)
+{     
+        int failed = 0;
+        int i;
+
+        for (i = 0; i < num_mdt_files; i++) {
+                DB* mdt_dfiddb = mdt_info[i].mdt_dfiddb;
+                struct lu_fid mdt_fid;
+                DBT key, data;
+	        DBC *dbcp;
+		int rc;
+
+                rc = mdt_dfiddb->cursor(mdt_dfiddb, NULL, &dbcp, 0);
+                if (rc != 0) {
+                        log_write("[%u]: error acquiring cursor for mds dfid table: %s\n",
+                                  i, db_strerror(rc));
+                	continue;
+		}
+
+                memset(&key, 0, sizeof(key));
+                memset(&data, 0, sizeof(data));
+                data.data = &mdt_fid;
+                data.size = data.ulen = sizeof(mdt_fid);
+                data.flags = DB_DBT_USERMEM;
+                while ((rc = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) {
+
+                        rc = lfsck_locate_fid(mdt_info, &mdt_fid);
+                        if (rc != 0) {
+                                failed = 1;
+                                fprintf(stderr, "Can not veryfied "DFID": \n", 
+					PFID(&mdt_fid));
+                        }
+                }
+                dbcp->c_close(dbcp);
+                if (failed)
+                        fprintf(stderr, "MDS%d OIDB check failed \n", 
+				mdt_info[i].mdt_hdr->mds_index);
+                else
+                        fprintf(stderr, "MDS%d OIDB check pass \n", 
+				mdt_info[i].mdt_hdr->mds_index); 
+        }
+	if (failed)
+		return -1;
+	return 0; 
+}
+
+static int lfsck_mdt_check_stripe_ent(struct lfsck_mdt_check_info *mdt_info, DB *dbp)
+{
+        struct lfsck_mds_stripe_ent mdt_ent;
+        struct lu_fid fid;
+        DBC *dbcp;
+        DBT key, data;
+        int rc;
+	int failed = 0;
+
+        rc = dbp->cursor(dbp, NULL, &dbcp, 0);
+        if (rc) {
+                log_write("Get cursor failed %s \n", db_strerror(rc));
+                dbp->close(dbp, 0);
+        	return rc;
+	}
+
+        memset(&key, 0, sizeof(key));
+        memset(&data, 0, sizeof(data));
+
+        key.data = &fid;
+        key.size = sizeof(fid);
+        data.data = &mdt_ent; 
+        data.size = data.ulen = sizeof(mdt_ent);
+        data.flags = DB_DBT_USERMEM;
+        while ((rc = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) {
+                rc = lfsck_locate_fid(mdt_info, &mdt_ent.mds_fid);
+                if (rc != 0) {
+                        failed = 1;
+                        fprintf(stderr, "MDT can not verify idx %d "DFID":"DFID": \n", 
+                                mdt_ent.mds_mdtidx, PFID(&mdt_ent.mds_mfid),
+                                PFID(&mdt_ent.mds_fid));
+                }
+                rc = lfsck_locate_fid(mdt_info, &mdt_ent.mds_mfid);
+                if (rc != 0) {
+                        failed = 1;
+                        fprintf(stderr, "MDT can not verify idx %d "DFID":"DFID": \n", 
+                                mdt_ent.mds_mdtidx, PFID(&mdt_ent.mds_mfid),
+                                PFID(&mdt_ent.mds_fid));
+                }
+        }
+        dbcp->c_close(dbcp);
+
+	if (failed)
+		return -1;
+	return 0;
+}
+
+static int lfsck_mdt_check_stripedb(struct lfsck_mdt_check_info *mdt_info,
+			            char *dbname)
+{
+        int i, failed = 0;
+        DB * dbp;
+
+        for (i = 0; i < num_mdt_files; i++) {
+                int rc;
+
+                rc = lfsck_opendb(mdt_files[i], dbname, &dbp, 1, 0, 0, DB_HASH);
+                if (rc) {
+			failed = 1; 
+                        continue;
+		}
+
+                rc = lfsck_mdt_check_stripe_ent(mdt_info, dbp);
+                if (rc) {
+			failed = 1;
+                        continue; 
+		}
+                dbp->close(dbp, 0);
+        }
+
+        if (failed)
+                log_write("MDT striped check failed. \n");
+        else
+                log_write("MDT striped check pass! \n");
+
+	if (failed)
+		return -1;
+	
+	return 0;	
+}
+
+static int lfsck_mdt_check_pass2(struct lfsck_mdt_check_info *mdt_info)
+{
+	return lfsck_mdt_check_stripedb(mdt_info, MDS_MDTDB);
+}
+
+static int lfsck_mdt_check_pass3(struct lfsck_mdt_check_info *mdt_info)
+{
+	return lfsck_mdt_check_stripedb(mdt_info, MDS_OBJDB);
+}
+
+/* multiple-MDT checks and repair */
+static int lfsck_check_mdts(struct lfsck_mdt_check_info *mdt_info)
+{
+	int rc = 0;
+
+        /* For single MDT, just return */
+        if (num_mdt_files <= 1)
+                return rc;
+
+        log_write("mds_lfsck: pass1: check cross-ref inode of MDT\n"); 
+        lfsck_mdt_check_pass1(mdt_info);
+
+        log_write("mds_lfsck: pass2: check for striped_dir MDT\n");
+        lfsck_mdt_check_pass2(mdt_info);
+
+        log_write("mds_lfsck: pass3: check for slave objects MDT\n");
+        lfsck_mdt_check_pass3(mdt_info);
+        
+	return 0;
+}
+
 /* Start threads and run filesystem checks and repair */
 int lfsck_run_checks()
 {
-	struct lfsck_mds_hdr *mds_hdr = NULL;
+        struct lfsck_mdt_check_info mdt_info[LMV_MAX_MDTS] = { {NULL} };
 	struct lfsck_thread_info *tinfo = NULL;
 	pthread_t *threads = NULL;
 	int rc, i;
-	DB *mds_direntdb = NULL;
-	DB *mds_hdrdb = NULL;
-	DB *mds_sizeinfodb = NULL;
-	DBT key, data;
-	int num_osts;
+	int num_osts = 0;
 
-	rc = lfsck_opendb(mds_file, MDS_HDR, &mds_hdrdb, 0, 0, 0);
-	if (rc != 0) {
-		log_write("%s: error opening mds_hdr in %s: rc %d\n",
-			  mds_file, rc);
-		return(-EINVAL);
-	}
-	mds_hdr = malloc(sizeof(*mds_hdr));
-	if (mds_hdr == NULL) {
-		log_write("%s: out of memory allocating DB header (%u)\n",
-			  progname, sizeof(*mds_hdr));
-		rc = -ENOMEM;
-		goto out;
-	}
-	memset(&key, 0, sizeof(key));
-	memset(&data, 0, sizeof(data));
-	mds_hdr->mds_magic = MDS_MAGIC;
-	key.data = &mds_hdr->mds_magic;
-	key.size = sizeof(mds_hdr->mds_magic);
-	data.data = mds_hdr;
-	data.size = sizeof(*mds_hdr);
-	data.ulen = sizeof(*mds_hdr);
-	data.flags = DB_DBT_USERMEM;
-	rc = mds_hdrdb->get(mds_hdrdb, NULL, &key, &data, 0);
-	if (rc != 0) {
-		log_write("%s: error getting mds_hdr info %s: %s\n",
-			  progname, mds_file, db_strerror(rc));
-		goto out;
-	}
-	letocpu_mds_hdr(mds_hdr);
-
-	rc = lfsck_opendb(mds_file, MDS_DIRINFO, &mds_direntdb, 0, 0, 0);
-	if (rc != 0) {
-		log_write("%s: error opening dirinfo db %s\n",
-			  progname, mds_file);
-		goto out;
-	}
-
-	rc = lfsck_opendb(mds_file, MDS_SIZEINFO, &mds_sizeinfodb, 0, 0, 0);
-	if (rc != 0) {
-		log_write("%s: error opening sizeinfo db %s\n",
-			  progname, mds_file);
-		goto out;
-	}
-
-	if (lov_tgt_count > mds_hdr->mds_num_osts) {
-		fprintf(stderr, "%s: number of osts in lov (%u) > "
-				"num referenced in mds (%u) (new ost or "
-				"empty filesystem?)\n", progname,
-				lov_tgt_count, mds_hdr->mds_num_osts);
-		fprintf(stderr, "Do you wish to continue? (y/n)\n");
-		if ((rc = get_response()) != 1) {
-			log_write("%s: exiting \n", progname);
-			goto out;
-		}
-		fprintf(stderr, "\n");
-
-		num_osts = lov_tgt_count;
-	} else {
-		num_osts = mds_hdr->mds_num_osts;
-	}
-	if (num_threads > num_osts)
-		num_threads = num_osts;
-
-	tinfo = calloc(num_threads, sizeof(*tinfo));
+        /* Check MDTs */
+        for (i = 0; i < num_mdt_files; i++) {
+                rc = lfsck_opendb(mdt_files[i], MDS_HDR, &mdt_info[i].mdt_hdrdb, 
+                                  0, 0, 0, DB_HASH);
+                if (rc != 0) {
+                        log_write("error opening mdt_hdr in %s: rc %d\n",
+                                  mdt_files[i], rc);
+                        return(-EINVAL);
+                }
+                rc = lfsck_opendb(mdt_files[i], MDS_FLDB, &mdt_info[i].mdt_fldb, 
+                                  0, 0, 0, DB_BTREE);
+                if (rc) {
+                        log_write("error opening mds_fldb in %s: rc %d \n",
+                                  mdt_files[i], rc);
+                        goto out;
+                }
+                rc = lfsck_opendb(mdt_files[i], MDS_OIDB, &mdt_info[i].mdt_oidb, 
+                                  0, 0, 0, DB_HASH);
+                if (rc) {
+                        log_write("error opening mds_oidb in %s: rc %d \n", 
+                                  mdt_files[i], rc);
+                        goto out;
+                }
+                rc = lfsck_opendb(mdt_files[i], MDS_DFIDDB, &mdt_info[i].mdt_dfiddb, 
+                                  0, 0, 0, DB_HASH);
+                if (rc) {
+                        log_write("error opening mds_oidb in %s: rc %d \n", 
+                                  mdt_files[i], rc);
+                        goto out;
+                }
+                mdt_info[i].mdt_hdr = malloc(sizeof(struct lfsck_mds_hdr));
+                if (mdt_info[i].mdt_hdr == NULL) {
+                        log_write("%s: out of memory allocating DB header (%u)\n",
+                                  progname, sizeof(struct lfsck_mds_hdr));
+                        rc = -ENOMEM;
+                        goto out;
+                }
+                rc = lfsck_get_mdt_hdr(mdt_info[i].mdt_hdrdb, mdt_info[i].mdt_hdr);
+                if (lov_tgt_count > mdt_info[i].mdt_hdr->mds_num_osts) {
+                        fprintf(stderr, "%s: number of osts in lov (%u) > "
+                                        "num referenced in mds (%u) (new ost or "
+                                        "empty filesystem?)\n", progname,
+                                        lov_tgt_count, mdt_info[i].mdt_hdr->mds_num_osts);
+                        fprintf(stderr, "Do you wish to continue? (y/n)\n");
+                        if ((rc = get_response()) != 1) {
+                                log_write("%s: exiting \n", progname);
+                                goto out;
+                        }
+                        fprintf(stderr, "\n");
+                        num_osts = lov_tgt_count;
+                } else {
+                        num_osts = mdt_info[i].mdt_hdr->mds_num_osts;
+                }
+                if (num_threads > num_osts)
+                        num_threads = num_osts;
+        }
+
+        rc = lfsck_check_mdts(&mdt_info[0]);
+
+        for (i = 0; i < num_mdt_files; i++) {
+                if (mdt_info[i].mdt_fldb != NULL) { 
+                        mdt_info[i].mdt_fldb->close(mdt_info[i].mdt_fldb, 0);
+                        mdt_info[i].mdt_fldb = NULL;
+                }
+                if (mdt_info[i].mdt_oidb != NULL) {
+                        mdt_info[i].mdt_oidb->close(mdt_info[i].mdt_oidb, 0);
+                        mdt_info[i].mdt_oidb = NULL;
+                }
+                if (mdt_info[i].mdt_dfiddb != NULL) {
+                        mdt_info[i].mdt_dfiddb->close(mdt_info[i].mdt_dfiddb, 0);
+                        mdt_info[i].mdt_dfiddb = NULL;
+                }
+                if (mdt_info[i].mdt_hdrdb != NULL) {
+                        mdt_info[i].mdt_hdrdb->close(mdt_info[i].mdt_hdrdb, 0);
+                        mdt_info[i].mdt_hdrdb = NULL;
+                }
+        }
+        if (rc)
+                fprintf(stderr, "MDS-MDS consistency check failed \n");
+        else
+                fprintf(stderr, "MDS-MDS consistency check succeed \n");
+ 
+        tinfo = calloc(num_threads, sizeof(*tinfo));
 	if (tinfo == NULL) {
 		log_write("%s: out of memory for thread info\n", progname);
 		rc = -ENOMEM;
@@ -1799,6 +2139,23 @@ int lfsck_run_checks()
 		goto out;
 	}
 
+        for (i = 0; i < num_mdt_files; i++) {
+                rc = lfsck_opendb(mdt_files[i], MDS_DIRINFO, &mdt_info[i].mdt_direntdb, 
+                                  0, 0, 0, DB_HASH);
+                if (rc != 0) {
+                        log_write("%s: error opening dirinfo db %s\n",
+                                  mdt_files[i], rc);
+                        goto out;
+                }
+
+                rc = lfsck_opendb(mdt_files[i], MDS_SIZEINFO, &mdt_info[i].mdt_sizeinfodb, 
+                                  0, 0, 0, DB_HASH);
+                if (rc != 0) {
+                        log_write("%s: error opening sizeinfo db %s\n",
+                                  mdt_files[i], rc);
+                        goto out;
+                }
+        }
 	all_started = 0;
 	for (i = 0; i < num_threads; i++) {
 		__u32 end_ost_idx;
@@ -1807,14 +2164,11 @@ int lfsck_run_checks()
 		chunk = num_osts / num_threads;
 		if (num_osts % num_threads)
 			chunk++;
-		tinfo[i].mds_hdr = mds_hdr;
-		tinfo[i].mds_direntdb = mds_direntdb;
-		tinfo[i].mds_sizeinfodb = mds_sizeinfodb;
+		tinfo[i].mds_info = &mdt_info[0];
 		tinfo[i].status = 0;
 		tinfo[i].start_ost_idx = (chunk) * i;
 		end_ost_idx = (chunk) * (i + 1);
-		end_ost_idx = end_ost_idx > num_osts ?
-			      num_osts : end_ost_idx;
+		end_ost_idx = end_ost_idx > num_osts ?  num_osts : end_ost_idx;
 		tinfo[i].end_ost_idx = end_ost_idx;
 		rc = pthread_create(&threads[i], NULL, lfsck_start_thread,
 				    &tinfo[i]);
@@ -1843,26 +2197,37 @@ int lfsck_run_checks()
 		}
 	}
 
-	rc = lfsck_run_pass4(mds_direntdb);
-	if (rc != 0)
-		goto out;
-
-	rc = lfsck_run_pass5(mds_direntdb, mds_sizeinfodb);
-
+        for (i = 0; i < num_mdt_files; i++) {
+                log_write("lfsck : pass4: check for duplicate object references MDT%d\n", 
+                           mdt_info[i].mdt_hdr->mds_index);
+                rc = lfsck_run_pass4(mdt_info[i].mdt_direntdb);
+                if (rc != 0)
+                        goto out;
+	         log_write("lfsck: pass5: file size correctness MDT%d\n",
+                          mdt_info[i].mdt_hdr->mds_index);
+                rc = lfsck_run_pass5(mdt_info[i].mdt_direntdb, 
+                                     mdt_info[i].mdt_sizeinfodb);
+        }
 out:
 	if (threads)
 		free(threads);
 	if (tinfo)
 		free(tinfo);
-	if (mds_hdr)
-		free(mds_hdr);
-	if (mds_direntdb)
-		mds_direntdb->close(mds_direntdb, 0);
-	if (mds_hdrdb)
-		mds_hdrdb->close(mds_hdrdb, 0);
-	if (mds_sizeinfodb)
-		mds_sizeinfodb->close(mds_sizeinfodb, 0);
 
+        for (i = 0; i < num_mdt_files; i++) {
+                if (mdt_info[i].mdt_fldb != NULL) 
+                        mdt_info[i].mdt_fldb->close(mdt_info[i].mdt_fldb, 0);
+                if (mdt_info[i].mdt_oidb != NULL)
+                        mdt_info[i].mdt_oidb->close(mdt_info[i].mdt_oidb, 0);
+                if (mdt_info[i].mdt_dfiddb != NULL)
+                        mdt_info[i].mdt_dfiddb->close(mdt_info[i].mdt_dfiddb, 0);
+                if (mdt_info[i].mdt_hdrdb != NULL)
+                        mdt_info[i].mdt_hdrdb->close(mdt_info[i].mdt_hdrdb, 0);
+	        if (mdt_info[i].mdt_direntdb)
+		        mdt_info[i].mdt_direntdb->close(mdt_info[i].mdt_direntdb, 0);
+       	        if (mdt_info[i].mdt_sizeinfodb)
+		        mdt_info[i].mdt_sizeinfodb->close(mdt_info[i].mdt_sizeinfodb, 0);
+        }
 	return(rc);
 }
 
@@ -1956,12 +2321,16 @@ int main(int argc, char *argv[])
 	if (lfsck_run_checks())
 		log_close(-1);
 
-	if (mds_file)
-		free(mds_file);
 	for (i = 0; i < LOV_MAX_OSTS; i++) {
 		if (ost_files[i])
 			free(ost_files[i]);
 	}
+
+	for (i = 0; i < LMV_MAX_MDTS; i++) {
+		if (mdt_files[i])
+			free(mdt_files[i]);
+	}
+
 	if (lfsck_duplicates)
 		free(lfsck_duplicates);
 
diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck_common.c ./e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck_common.c
--- old/e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck_common.c	2010-10-08 03:48:33.000000000 -0700
+++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck_common.c	2011-01-09 22:56:36.814242470 -0800
@@ -101,7 +101,8 @@ int lfsck_create_dbenv(const char *progn
 }
 
 int lfsck_opendb(const char *fname, const char *dbname, DB **dbpp,
-		 int allow_dup, int keydata_size, int num_files)
+		 int allow_dup, int keydata_size, int num_files, 
+		 DBTYPE type)
 {
 	static int dbenv_set = 0;
 	DB *dbp;
@@ -134,26 +135,27 @@ int lfsck_opendb(const char *fname, cons
 		return (EIO);
 	}
 
-	if (keydata_size && num_files) {
-		h_ffactor = (pagesize - 32) / (keydata_size + 8);
-		if ((rc = dbp->set_h_ffactor(dbp, h_ffactor)) != 0) {
-			dbp->err(dbp, rc, "set_h_ffactor");
-			dbp->close(dbp, 0);
-			return (EIO);
+	if (type == DB_HASH) {
+		if (keydata_size && num_files) {
+			h_ffactor = (pagesize - 32) / (keydata_size + 8);
+			if ((rc = dbp->set_h_ffactor(dbp, h_ffactor)) != 0) {
+				dbp->err(dbp, rc, "set_h_ffactor");
+				dbp->close(dbp, 0);
+				return (EIO);
+			}
+			if ((rc = dbp->set_h_nelem(dbp, num_files)) != 0 ) {
+				dbp->err(dbp, rc, "set_h_nelem");
+				dbp->close(dbp, 0);
+				return (EIO);
+			}
 		}
-		if ((rc = dbp->set_h_nelem(dbp, num_files)) != 0 ) {
-			dbp->err(dbp, rc, "set_h_nelem");
+
+		if ((rc = dbp->set_h_hash(dbp, lfsck_hash_fn)) != 0 ) {
+			dbp->err(dbp, rc, "set_h_hash");
 			dbp->close(dbp, 0);
 			return (EIO);
 		}
 	}
-
-	if ((rc = dbp->set_h_hash(dbp, lfsck_hash_fn)) != 0 ) {
-		dbp->err(dbp, rc, "set_h_hash");
-		dbp->close(dbp, 0);
-		return (EIO);
-	}
-
 	if (allow_dup) {
 		if((rc = dbp->set_flags(dbp, DB_DUPSORT)) != 0) {
 			fprintf(stderr, "Failure to allow duplicates\n");
@@ -161,12 +163,12 @@ int lfsck_opendb(const char *fname, cons
 			return (EIO);
 		}
 	}
-
+	
 #if (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR >= 1) || (DB_VERSION_MAJOR > 4)
-	if ((rc = dbp->open(dbp, NULL, fname, dbname, DB_HASH,
+	if ((rc = dbp->open(dbp, NULL, fname, dbname, type,
 			    DB_CREATE | DB_INIT_LOCK | DB_THREAD, 0664)) != 0)
 #else
-	if ((rc = dbp->open(dbp, fname, dbname, DB_HASH,
+	if ((rc = dbp->open(dbp, fname, dbname, type,
 			    DB_CREATE | DB_INIT_LOCK | DB_THREAD, 0664)) != 0)
 #endif
 	{
@@ -185,6 +187,7 @@ void cputole_mds_hdr(struct lfsck_mds_hd
 	mds_hdr->mds_flags = ext2fs_cpu_to_le64(mds_hdr->mds_flags);
 	mds_hdr->mds_max_files = ext2fs_cpu_to_le64(mds_hdr->mds_max_files);
 	mds_hdr->mds_num_osts = ext2fs_cpu_to_le64(mds_hdr->mds_num_osts);
+	mds_hdr->mds_index = ext2fs_cpu_to_le32(mds_hdr->mds_index);
 	for (i = 0; i < num_osts; i++) {
 		 mds_hdr->mds_max_ost_id[i] =
 			      ext2fs_cpu_to_le64(mds_hdr->mds_max_ost_id[i]);
@@ -199,6 +202,7 @@ void letocpu_mds_hdr(struct lfsck_mds_hd
 	mds_hdr->mds_flags = ext2fs_le64_to_cpu(mds_hdr->mds_flags);
 	mds_hdr->mds_max_files = ext2fs_le64_to_cpu(mds_hdr->mds_max_files);
 	mds_hdr->mds_num_osts = ext2fs_le64_to_cpu(mds_hdr->mds_num_osts);
+	mds_hdr->mds_index = ext2fs_le64_to_cpu(mds_hdr->mds_index);
 	for (i = 0; i < mds_hdr->mds_num_osts; i ++) {
 		mds_hdr->mds_max_ost_id[i] =
 			     ext2fs_le64_to_cpu(mds_hdr->mds_max_ost_id[i]);
@@ -207,18 +211,24 @@ void letocpu_mds_hdr(struct lfsck_mds_hd
 
 void cputole_ost_hdr(struct lfsck_ost_hdr *ost_hdr)
 {
+	int i;
 	ost_hdr->ost_magic = ext2fs_cpu_to_le64(ost_hdr->ost_magic);
 	ost_hdr->ost_flags = ext2fs_cpu_to_le64(ost_hdr->ost_flags);
 	ost_hdr->ost_num_files = ext2fs_cpu_to_le64(ost_hdr->ost_num_files);
-	ost_hdr->ost_last_id = ext2fs_cpu_to_le64(ost_hdr->ost_last_id);
+	for (i = 0; i < ost_hdr->ost_mds_num; i++)
+		ost_hdr->ost_last_id[i] = ext2fs_cpu_to_le64(ost_hdr->ost_last_id[i]);
+	ost_hdr->ost_mds_num = ext2fs_cpu_to_le32(ost_hdr->ost_mds_num);
 }
 
 void letocpu_ost_hdr(struct lfsck_ost_hdr *ost_hdr)
 {
+	int i;
 	ost_hdr->ost_magic = ext2fs_le64_to_cpu(ost_hdr->ost_magic);
 	ost_hdr->ost_flags = ext2fs_le64_to_cpu(ost_hdr->ost_flags);
 	ost_hdr->ost_num_files = ext2fs_le64_to_cpu(ost_hdr->ost_num_files);
-	ost_hdr->ost_last_id = ext2fs_le64_to_cpu(ost_hdr->ost_last_id);
+	ost_hdr->ost_mds_num = ext2fs_le32_to_cpu(ost_hdr->ost_mds_num);
+	for (i = 0; i < ost_hdr->ost_mds_num; i++)
+		ost_hdr->ost_last_id[i] = ext2fs_le64_to_cpu(ost_hdr->ost_last_id[i]);
 }
 
 void cputole_mds_dirent(struct lfsck_mds_dirent *mds_dirent)
@@ -281,6 +291,14 @@ void letocpu_mds_objent(struct lfsck_mds
 	mds_objent->mds_ostoffset = ext2fs_le32_to_cpu(mds_objent->mds_ostoffset);
 }
 
+void cputole_mds_stripe_ent(struct lfsck_mds_stripe_ent *mds_objent)
+{
+	mds_objent->mds_fid.f_seq = ext2fs_cpu_to_le64(mds_objent->mds_fid.f_seq);
+	mds_objent->mds_fid.f_oid = ext2fs_cpu_to_le32(mds_objent->mds_fid.f_oid);
+	mds_objent->mds_fid.f_ver = ext2fs_cpu_to_le32(mds_objent->mds_fid.f_ver);
+	mds_objent->mds_mdtidx = ext2fs_cpu_to_le32(mds_objent->mds_mdtidx);
+}
+
 void cputole_ost_objent(struct lfsck_ost_objent *ost_objent)
 {
 	ost_objent->ost_objid = ext2fs_cpu_to_le64(ost_objent->ost_objid);
@@ -307,7 +325,7 @@ void letocpu_lov_user_md(struct lov_user
 	lmm->lmm_magic = ext2fs_le32_to_cpu(lmm->lmm_magic);
 	lmm->lmm_pattern = ext2fs_le32_to_cpu(lmm->lmm_pattern);
 	lmm->lmm_object_id = ext2fs_le64_to_cpu(lmm->lmm_object_id);
-	lmm->lmm_object_gr = ext2fs_le64_to_cpu(lmm->lmm_object_gr);
+	lmm->lmm_object_seq = ext2fs_le64_to_cpu(lmm->lmm_object_seq);
 	lmm->lmm_stripe_size = ext2fs_le32_to_cpu(lmm->lmm_stripe_size);
 	lmm->lmm_stripe_count = ext2fs_le16_to_cpu(lmm->lmm_stripe_count);
 	/* No swabbing needed for the lov_user_md_v3 lmm_pool_name */
@@ -321,9 +339,33 @@ void letocpu_lov_user_md(struct lov_user
 
 	for (i = 0; i < lmm->lmm_stripe_count; i++, loi++) {
 		loi->l_object_id = ext2fs_le64_to_cpu(loi->l_object_id);
-		loi->l_object_gr = ext2fs_le64_to_cpu(loi->l_object_gr);
+		loi->l_object_seq = ext2fs_le64_to_cpu(loi->l_object_seq);
 		loi->l_ost_gen = ext2fs_le32_to_cpu(loi->l_ost_gen);
 		loi->l_ost_idx = ext2fs_le32_to_cpu(loi->l_ost_idx);
 	}
 }
+
+void letocpu_lmv_user_md(struct lmv_user_md *lmm)
+{
+	struct lmv_user_mds_data *lmi;
+	int i;
+
+	lmm->lum_magic = ext2fs_le32_to_cpu(lmm->lum_magic);
+	lmm->lum_stripe_count = ext2fs_le32_to_cpu(lmm->lum_stripe_count);
+	lmm->lum_stripe_offset = ext2fs_le32_to_cpu(lmm->lum_stripe_offset);
+	lmm->lum_hash_type = ext2fs_le32_to_cpu(lmm->lum_hash_type);
+	lmm->lum_type = ext2fs_le32_to_cpu(lmm->lum_type);
+	
+	lmi = lmm->lum_objects;
+	/* If there is a bad magic, this will be found immediately in the
+	 * call to lfsck_check_lov_ea() following this function. */
+
+	for (i = 0; i < lmm->lum_stripe_count; i++, lmi++) {
+		lmi->lum_fid.f_seq = ext2fs_le64_to_cpu(lmi->lum_fid.f_seq);
+		lmi->lum_fid.f_oid = ext2fs_le32_to_cpu(lmi->lum_fid.f_oid);
+		lmi->lum_fid.f_ver = ext2fs_le32_to_cpu(lmi->lum_fid.f_ver);
+		lmi->lum_mds = ext2fs_le32_to_cpu(lmi->lum_mds);
+	}
+}
+
 #endif
Only in ./e2fsprogs-1.41.12.2.ora1/e2fsck/: lfsck_common.o
diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.h ./e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.h
--- old/e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.h	2010-10-08 03:48:33.000000000 -0700
+++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/lfsck.h	2011-01-14 17:01:46.912165809 -0800
@@ -41,19 +41,27 @@
 #define LAST_ID "LAST_ID"
 #define LAST_RCVD "last_rcvd"
 #define LOV_OBJID "lov_objid"
+#define FIRST_MDT_GROUP			3
 
 #ifndef EXT3_XATTR_INDEX_TRUSTED        /* temporary until we hit l28 kernel */
 #define EXT3_XATTR_INDEX_TRUSTED        4
 #endif
 #define XATTR_LUSTRE_MDS_LOV_EA         "lov"
+#define XATTR_LUSTRE_MDS_LMV_EA         "lmv"
 
 /* Database names */
 #define MDS_HDR       "mdshdr"
 #define MDS_DIRINFO   "mds_dirinfo"
 #define MDS_SIZEINFO  "mds_sizeinfo"
 #define MDS_OSTDB     "mds_ostdb"
+#define MDS_MDTDB     "mds_mdtdb"
+#define MDS_FLDB      "mds_fldb"
+#define MDS_OIDB      "mds_oidb"
+#define MDS_OBJDB     "mds_obj"
 #define OST_HDR       "osthdr"
 #define OST_OSTDB     "ost_db"
+#define MDS_DFIDDB    "mds_dfiddb"
+
 
 #define MDS_MAGIC     0xDBABCD01
 #define OST_MAGIC     0xDB123402
@@ -68,6 +76,9 @@
 #define LOV_EA_SIZE(lum, num) (sizeof(*lum) + num * sizeof(*lum->lmm_objects))
 #define LOV_EA_MAX(lum) LOV_EA_SIZE(lum, LOV_MAX_OSTS)
 
+
+#define LMV_MAX_MDTS 	8
+
 /*XXX*/
 #define STRTOUL strtoul
 #define STRTOUL_MAX ULONG_MAX
@@ -99,6 +110,7 @@ struct lfsck_mds_hdr {
 	__u64 mds_max_files;
 	__u32 mds_num_osts;
 	__u32 mds_unused;
+	__u32 mds_index;
 	__u64 mds_max_ost_id[LOV_MAX_OSTS];
 	struct obd_uuid mds_uuid;
 	struct obd_uuid mds_ost_info[LOV_MAX_OSTS];
@@ -108,9 +120,10 @@ struct lfsck_ost_hdr  {
 	__u64 ost_magic;
 	__u64 ost_flags;
 	__u64 ost_num_files;
-	__u64 ost_last_id;
+	__u64 ost_last_id[LMV_MAX_MDTS];
 	__u32 ost_index;
 	__u32 ost_unused;
+	__u32 ost_mds_num;
 	struct obd_uuid ost_mds_uuid;
 	struct obd_uuid ost_uuid;
 };
@@ -143,6 +156,19 @@ struct lfsck_mds_objent {
 	__u32 mds_ostoffset;
 };
 
+struct lfsck_mds_stripe_ent {
+	__u32 mds_mdtidx;
+	struct lu_fid mds_mfid;
+	struct lu_fid mds_fid;
+};
+
+struct lfsck_mds_fldb {
+        __u64 lsr_start; 
+        __u64 lsr_end;
+        __u32 lsr_index; 
+        __u32 lsr_flags; 
+};
+
 struct lfsck_ost_objent {
 	__u64 ost_objid;
 	__u64 ost_group;
@@ -159,11 +185,20 @@ struct lfsck_ofile_ctx {
 
 struct lfsck_outdb_info {
 	__u32 ost_count;
+	__u32 mdt_count;
 	int have_ost_count;
 	DB *mds_sizeinfo_dbp;
+	DB *mds_dirfid_dbp;
+	DB *mds_dirstripe_dbp;
 	struct lfsck_ofile_ctx *ofile_ctx;
 };
 
+struct osd_inode_id {     
+        __u32 oii_ino; /* inode number */
+        __u32 oii_gen; /* inode generation */
+};
+
+typedef __u64 seqno_t;
 /* pass6.c */
 extern int e2fsck_lfsck_found_ea(e2fsck_t ctx, ext2_ino_t ino,
 				 struct ext2_inode_large *inode,
@@ -171,11 +206,15 @@ extern int e2fsck_lfsck_found_ea(e2fsck_
 extern int e2fsck_lfsck_flush_ea(e2fsck_t ctx);
 extern int e2fsck_lfsck_cleanupdb(e2fsck_t ctx);
 extern int e2fsck_lfsck_remove_pending(e2fsck_t ctx, char *block_buf);
+extern int e2fsck_lfsck_put_fid(e2fsck_t ctx, struct ext2_dir_entry_2 *de);
+extern int e2fsck_lfsck_flush_fid(e2fsck_t ctx);
 
 /* lfsck_common.c */
+
 extern int lfsck_create_dbenv(const char *progname);
 extern int lfsck_opendb(const char *fname, const char *dbname, DB **dbpp,
-			int allow_dup, int keydata_size, int num_files);
+			int allow_dup, int keydata_size, int num_files, 
+			DBTYPE type);
 extern void cputole_mds_hdr(struct lfsck_mds_hdr *mds_hdr);
 extern void letocpu_mds_hdr(struct lfsck_mds_hdr *mds_hdr);
 extern void cputole_ost_hdr(struct lfsck_ost_hdr *ost_hdr);
@@ -189,6 +228,9 @@ extern void letocpu_mds_objent(struct lf
 extern void cputole_ost_objent(struct lfsck_ost_objent *ost_objent);
 extern void letocpu_ost_objent(struct lfsck_ost_objent *ost_objent);
 extern void letocpu_lov_user_md(struct lov_user_md *lmm);
+extern void letocpu_lmv_user_md(struct lmv_user_md *lmv);
+
+extern void cputole_mds_stripe_ent(struct lfsck_mds_stripe_ent *mds_objent);
 
 #define MDS_START_DIRENT_TABLE sizeof(struct lfsck_mds_hdr)
 
diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/pass1.c ./e2fsprogs-1.41.12.2.ora1/e2fsck/pass1.c
--- old/e2fsprogs-1.41.12.2.ora1/e2fsck/pass1.c	2010-10-08 03:48:33.000000000 -0700
+++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/pass1.c	2011-01-15 20:50:43.366721207 -0800
@@ -521,8 +521,8 @@ static void check_inode_extra_space(e2fs
 	}
 
 	if (EXT4_FITS_IN_INODE(inode, inode, i_crtime) &&
-	    inode->i_crtime < sb->s_mkfs_time ||
-	    inode->i_crtime > ctx->now + ctx->now_tolerance)
+	    (inode->i_crtime < sb->s_mkfs_time ||
+	    inode->i_crtime > ctx->now + ctx->now_tolerance))
 		e2fsck_mark_inode_bad(ctx, pctx->ino, BADNESS_HIGH);
 
 	eamagic = IHDR(inode);
diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/pass2.c ./e2fsprogs-1.41.12.2.ora1/e2fsck/pass2.c
--- old/e2fsprogs-1.41.12.2.ora1/e2fsck/pass2.c	2010-10-08 03:48:33.000000000 -0700
+++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/pass2.c	2011-01-09 21:48:46.967778850 -0800
@@ -46,6 +46,7 @@
 #include "e2fsck.h"
 #include "problem.h"
 #include "dict.h"
+#include "lfsck.h"
 
 #ifdef NO_INLINE_FUNCS
 #define _INLINE_
@@ -141,6 +142,7 @@ void e2fsck_pass2(e2fsck_t ctx)
 
 	cd.pctx.errcode = ext2fs_dblist_iterate(fs->dblist, check_dir_block,
 						&cd);
+	e2fsck_lfsck_flush_fid(ctx);
 	if (ctx->flags & E2F_FLAG_SIGNAL_MASK || ctx->flags & E2F_FLAG_RESTART)
 		return;
 
@@ -377,12 +379,12 @@ int e2fsck_check_dirent_data(e2fsck_t ct
 		return 1;
 	}
 	if (de->file_type & ~EXT2_FT_MASK) {
-
 		if (de->rec_len >= EXT2_DIR_REC_LEN(de) ||
 		   (de->rec_len + offset == EXT2_BLOCK_SIZE(ctx->fs->super))) {
 			if (ext2_get_dirent_dirdata_size(de, EXT2_DIRENT_LUFID) ==
-			    EXT2_DIRENT_LUFID_SIZE)
+			    EXT2_DIRENT_LUFID_SIZE) {
 				return 0;
+			}
 		}
 		/* just clear dirent data flags for now, we should fix FID data
 		 * in lustre specific pass.
@@ -972,6 +974,9 @@ out_htree:
 		if (ret == 2)
 			dir_modified++;
 
+		if (ret == 0)
+			e2fsck_lfsck_put_fid(ctx, (struct ext2_dir_entry_2 *)dirent);
+
 		/*
 		 * Make sure the inode listed is a legal one.
 		 */
@@ -1052,7 +1057,7 @@ out_htree:
 			if (ctx->flags & E2F_FLAG_SIGNAL_MASK)
 				return DIRENT_ABORT;
 		}
-
+		
 		group = ext2fs_group_of_ino(fs, dirent->inode);
 		first_unused_inode = group * fs->super->s_inodes_per_group +
 					1 + fs->super->s_inodes_per_group -
diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/pass6.c ./e2fsprogs-1.41.12.2.ora1/e2fsck/pass6.c
--- old/e2fsprogs-1.41.12.2.ora1/e2fsck/pass6.c	2010-10-08 03:48:33.000000000 -0700
+++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/pass6.c	2011-01-16 10:11:50.223886520 -0800
@@ -52,8 +52,30 @@ struct lfsck_ost_ctx {
 	int		numfiles;
 	int		status;
 	__u64		max_objid;
+	__u64		mds_group;
 };
 
+static inline void e2fsck_fid_le_to_cpu(struct lu_fid *dst, struct lu_fid *src)
+{
+	dst->f_seq = ext2fs_le64_to_cpu(src->f_seq);
+	dst->f_oid = ext2fs_le32_to_cpu(src->f_oid);
+	dst->f_ver = ext2fs_le32_to_cpu(src->f_ver);
+}
+
+static inline void e2fsck_fid_cpu_to_le(struct lu_fid *dst, struct lu_fid *src)
+{
+	dst->f_seq = ext2fs_cpu_to_le64(src->f_seq);
+	dst->f_oid = ext2fs_cpu_to_le32(src->f_oid);
+	dst->f_ver = ext2fs_cpu_to_le32(src->f_ver);
+}
+
+static inline void e2fsck_fid_be_to_cpu(struct lu_fid *dst, struct lu_fid *src)
+{
+	dst->f_seq = ext2fs_be64_to_cpu(src->f_seq);
+	dst->f_oid = ext2fs_be32_to_cpu(src->f_oid);
+	dst->f_ver = ext2fs_be32_to_cpu(src->f_ver);
+}
+
 int e2fsck_lfsck_cleanupdb(e2fsck_t ctx)
 {
 	int i;
@@ -146,11 +168,17 @@ static int lfsck_write_mds_hdrinfo(e2fsc
 	DB *mds_hdrdb = NULL;
 	DBT key, data;
 	int rc = 0;
+	char *mdsdb;
 	int i;
 
 	mds_hdrname = e2fsck_allocate_memory(ctx, PATH_MAX,
 					   "mds_hdr filename");
-	sprintf(mds_hdrname, "%s.mdshdr",ctx->lustre_mdsdb);
+	/* lfsck_write_mds_hdrinfo can only be called when checking MDS,
+ 	 * and only one MDS can be checked each time, so 
+ 	 * lustre_mds_files == 1 */ 
+	assert(ctx->lustre_mds_files == 1);
+	mdsdb = ctx->lustre_mdsdb[0];
+	sprintf(mds_hdrname, "%s.mdshdr",mdsdb);
 
 	if (unlink(mds_hdrname)) {
 		if (errno != ENOENT) {
@@ -161,7 +189,7 @@ static int lfsck_write_mds_hdrinfo(e2fsc
 		}
 	}
 
-	rc = lfsck_opendb(mds_hdrname, MDS_HDR, &mds_hdrdb, 0, 0, 0);
+	rc = lfsck_opendb(mds_hdrname, MDS_HDR, &mds_hdrdb, 0, 0, 0, DB_HASH);
 	if (rc != 0) {
 		fprintf(stderr, "failure to open database for mdsdhr "
 			"info%s: %s\n", MDS_HDR, db_strerror(rc));
@@ -213,61 +241,148 @@ out:
 	return (rc);
 }
 
-static int e2fsck_lfsck_save_ea(e2fsck_t ctx, ext2_ino_t ino, __u32 generation,
-				struct lov_user_md *lmm)
+static int e2fsck_lfsck_init_oinfo(e2fsck_t ctx)
 {
 	ext2_filsys fs = ctx->fs;
-	struct lfsck_mds_szinfo szinfo;
-	struct lov_user_ost_data_v1 *loi;
-	__u64 mds_fid;
-	int rc, i;
-	DBT key, data;
-	DB *dbp;
+	int rc;
 	__u32 numfiles = fs->super->s_inodes_count -
 			 fs->super->s_free_inodes_count;
+	char *mdsdb;
 
-	if (!ctx->lfsck_oinfo) {
-		/* remove old db file */
-		if (unlink(ctx->lustre_mdsdb)) {
-			rc = errno;
-			if (rc != ENOENT) {
-				fprintf(stderr,"Error removing old db %s: %s\n",
-					ctx->lustre_mdsdb, strerror(rc));
-				ctx->flags |= E2F_FLAG_ABORT;
-				return rc;
-			}
-		}
+	assert(ctx->lustre_mds_files == 1);
+	mdsdb = ctx->lustre_mdsdb[0];
 
-		rc = ext2fs_get_mem(sizeof(struct lfsck_outdb_info),
-				    &ctx->lfsck_oinfo);
-		if (rc) {
-			ctx->lfsck_oinfo = NULL;
+	/* remove old db file */
+	if (unlink(mdsdb)) {
+		rc = errno;
+		if (rc != ENOENT) {
+			fprintf(stderr,"Error removing old db %s: %s\n",
+				mdsdb, strerror(rc));
 			ctx->flags |= E2F_FLAG_ABORT;
 			return rc;
 		}
-		memset(ctx->lfsck_oinfo, 0, sizeof(struct lfsck_outdb_info));
-		rc = ext2fs_get_mem(sizeof(struct lfsck_ofile_ctx)*LOV_MAX_OSTS,
-				    &ctx->lfsck_oinfo->ofile_ctx);
-		if (rc) {
-			ext2fs_free_mem(&ctx->lfsck_oinfo);
-			ctx->flags |= E2F_FLAG_ABORT;
+	}
+
+	rc = ext2fs_get_mem(sizeof(struct lfsck_outdb_info),
+			    &ctx->lfsck_oinfo);
+	if (rc) {
+		ctx->lfsck_oinfo = NULL;
+		ctx->flags |= E2F_FLAG_ABORT;
+		return rc;
+	}
+	memset(ctx->lfsck_oinfo, 0, sizeof(struct lfsck_outdb_info));
+	rc = ext2fs_get_mem(sizeof(struct lfsck_ofile_ctx)*LOV_MAX_OSTS,
+			    &ctx->lfsck_oinfo->ofile_ctx);
+	if (rc) {
+		ext2fs_free_mem(&ctx->lfsck_oinfo);
+		ctx->flags |= E2F_FLAG_ABORT;
+		return rc;
+	}
+
+	memset(ctx->lfsck_oinfo->ofile_ctx, 0,
+	       sizeof(struct lfsck_ofile_ctx) * LOV_MAX_OSTS);
+	if (lfsck_opendb(mdsdb, MDS_SIZEINFO,
+			 &ctx->lfsck_oinfo->mds_sizeinfo_dbp, 0,
+			 sizeof(__u64) + sizeof(struct lfsck_mds_szinfo), 
+			 numfiles, DB_HASH)) {
+		fprintf(stderr, "Failed to open db file %s\n",
+			MDS_SIZEINFO);
+		ctx->flags |= E2F_FLAG_ABORT;
+		return (EIO);
+	}
+	if (lfsck_opendb(mdsdb, MDS_MDTDB,
+			 &ctx->lfsck_oinfo->mds_dirstripe_dbp, 1,
+			 sizeof(struct lu_fid) + sizeof(struct lfsck_mds_stripe_ent), 
+			 numfiles, DB_HASH)) {
+		fprintf(stderr, "Failed to open db file %s\n",
+			MDS_SIZEINFO);
+		ctx->flags |= E2F_FLAG_ABORT;
+		return (EIO);
+	}
+
+	if (ctx->options & E2F_OPT_READONLY) {
+		e2fsck_get_lov_objids(ctx, ctx->lfsck_oinfo);
+		lfsck_write_mds_hdrinfo(ctx, ctx->lfsck_oinfo);
+	}
+
+	return rc;
+}
+				
+static int e2fsck_lfsck_save_lmv_ea(e2fsck_t ctx, ext2_ino_t ino, 
+				    __u32 generation, struct lmv_user_md *lmv)
+{
+	struct lmv_user_mds_data *lmi;
+	struct lu_fid master_fid;
+	int rc = 0, i;
+	DBT key, data;
+	DB *dbp;
+
+	assert(ctx->lustre_mds_files == 1);
+	if (!ctx->lfsck_oinfo) {
+		rc = e2fsck_lfsck_init_oinfo(ctx);
+		if (rc)
 			return rc;
+	}
+	lmi = lmv->lum_objects;
+	e2fsck_fid_le_to_cpu(&master_fid, &lmi[0].lum_fid);
+	for (i = 0; i < lmv->lum_stripe_count; i++, lmi++) {
+		int mdt_idx = lmi->lum_mds;
+		struct lfsck_mds_stripe_ent mds_ent;
+		struct lu_fid fid;
+
+		e2fsck_fid_le_to_cpu(&fid, &lmi->lum_fid);
+		if (mdt_idx >= LMV_MAX_MDTS) {
+			fprintf(stderr, "invalid MDT index %u ino %u[%d]\n",
+				mdt_idx, ino, i);
+			continue;
 		}
-		memset(ctx->lfsck_oinfo->ofile_ctx, 0,
-		       sizeof(struct lfsck_ofile_ctx) * LOV_MAX_OSTS);
-		if (lfsck_opendb(ctx->lustre_mdsdb, MDS_SIZEINFO,
-				 &ctx->lfsck_oinfo->mds_sizeinfo_dbp, 0,
-				 sizeof(mds_fid) + sizeof(szinfo), numfiles)) {
-			fprintf(stderr, "Failed to open db file %s\n",
-				MDS_SIZEINFO);
+
+		if (mdt_idx + 1 > ctx->lfsck_oinfo->mdt_count)
+			ctx->lfsck_oinfo->mdt_count = mdt_idx + 1;
+
+		e2fsck_fid_cpu_to_le(&mds_ent.mds_mfid, &master_fid);
+		e2fsck_fid_cpu_to_le(&mds_ent.mds_fid, &fid);
+		mds_ent.mds_mdtidx = ext2fs_cpu_to_le32(mdt_idx);
+		memset(&key, 0, sizeof(key));
+		memset(&data, 0, sizeof(data));
+		key.data = &fid;
+		key.size = sizeof(struct lu_fid);
+
+		cputole_mds_stripe_ent(&mds_ent);
+		data.data = &mds_ent;
+		data.size = sizeof(mds_ent);
+		dbp = ctx->lfsck_oinfo->mds_dirstripe_dbp;
+		if ((rc = dbp->put(dbp, NULL, &key, &data, 0)) != 0) {
+			dbp->err(dbp, rc, "db->put failed\n");
+			e2fsck_lfsck_cleanupdb(ctx);
 			ctx->flags |= E2F_FLAG_ABORT;
+			/* XXX - Free lctx memory */
 			return (EIO);
 		}
+	}
+	return rc;
+}
 
-		if (ctx->options & E2F_OPT_READONLY) {
-			e2fsck_get_lov_objids(ctx, ctx->lfsck_oinfo);
-			lfsck_write_mds_hdrinfo(ctx, ctx->lfsck_oinfo);
-		}
+static int e2fsck_lfsck_save_ea(e2fsck_t ctx, ext2_ino_t ino, __u32 generation,
+				struct lov_user_md *lmm)
+{
+	ext2_filsys fs = ctx->fs;
+	struct lfsck_mds_szinfo szinfo;
+	struct lov_user_ost_data_v1 *loi;
+	__u64 mds_fid;
+	int rc, i;
+	DBT key, data;
+	DB *dbp;
+	__u32 numfiles = fs->super->s_inodes_count -
+			 fs->super->s_free_inodes_count;
+	char *mdsdb;
+
+	assert(ctx->lustre_mds_files == 1);
+	mdsdb = ctx->lustre_mdsdb[0];
+	if (!ctx->lfsck_oinfo) {
+		rc = e2fsck_lfsck_init_oinfo(ctx);
+		if (rc)
+			return rc;
 	}
 	if (lmm->lmm_magic == LOV_USER_MAGIC_V3)
 		loi = ((struct lov_user_md_v3 *)lmm)->lmm_objects;
@@ -278,7 +393,7 @@ static int e2fsck_lfsck_save_ea(e2fsck_t
 	/* XXX: We don't save the layout type here.  This doesn't matter for
 	 *      now, we don't really need the pool information for lfsck, but
 	 *      in the future we may need it for RAID-1 and other layouts. */
-	szinfo.mds_group = lmm->lmm_object_gr;
+	szinfo.mds_group = lmm->lmm_object_seq;
 	szinfo.mds_stripe_size = lmm->lmm_stripe_size;
 	szinfo.mds_stripe_start = loi->l_ost_idx;
 	szinfo.mds_stripe_count = lmm->lmm_stripe_count;
@@ -330,10 +445,9 @@ static int e2fsck_lfsck_save_ea(e2fsck_t
 			char dbname[256];
 			memset(dbname, 0, 256);
 			sprintf(dbname, "%s.%d", MDS_OSTDB, ost_idx);
-			rc = lfsck_opendb(ctx->lustre_mdsdb, dbname,
-					  &ofile_ctx->dbp, 1,
+			rc = lfsck_opendb(mdsdb, dbname, &ofile_ctx->dbp, 1,
 					  sizeof(objid) + sizeof(mds_ent),
-					  numfiles);
+					  numfiles, DB_HASH);
 			if (rc) {
 				e2fsck_lfsck_cleanupdb(ctx);
 				ctx->flags |= E2F_FLAG_ABORT;
@@ -384,13 +498,23 @@ int lfsck_check_lov_ea(e2fsck_t ctx, str
 			lmm->lmm_magic);
 		return(-EINVAL);
 	}
-
-	if (lmm->lmm_object_gr != 0 ) {
+#if 0
+	if (lmm->lmm_object_seq != 0 ) {
 		VERBOSE(ctx, "error: only handle group 0 not "LPU64"\n",
-			lmm->lmm_object_gr);
+			lmm->lmm_object_seq);
 		return(-EINVAL);
 	}
+#endif
+	return 0;
+}
 
+int lfsck_check_lmv_ea(e2fsck_t ctx, struct lmv_user_md *lmv)
+{
+	if (lmv->lum_magic != LMV_MAGIC_V1) {
+		VERBOSE(ctx, "error: wrong magic %08x , not %08x\n",
+			lmv->lum_magic, LMV_MAGIC_V1);
+		return(-EINVAL);
+	}
 	return 0;
 }
 
@@ -406,22 +530,34 @@ int e2fsck_lfsck_found_ea(e2fsck_t ctx, 
 	if ((ctx->lustre_devtype & LUSTRE_TYPE) != LUSTRE_MDS)
 		return 0;
 
-	if (!LINUX_S_ISREG(inode->i_mode))
+	if (!LINUX_S_ISREG(inode->i_mode) && !LINUX_S_ISDIR(inode->i_mode))
 		return 0;
+	
+	if (LINUX_S_ISREG(inode->i_mode)) {
+		if (entry->e_name_index == EXT3_XATTR_INDEX_TRUSTED &&
+		    !strncmp(entry->e_name,XATTR_LUSTRE_MDS_LOV_EA,entry->e_name_len)){
+			struct lov_user_md *lmm = value;
+			letocpu_lov_user_md(lmm);
 
-	if (entry->e_name_index == EXT3_XATTR_INDEX_TRUSTED &&
-	    !strncmp(entry->e_name,XATTR_LUSTRE_MDS_LOV_EA,entry->e_name_len)){
-		struct lov_user_md *lmm = value;
-		letocpu_lov_user_md(lmm);
+			if (lfsck_check_lov_ea(ctx, lmm)) {
+				ctx->flags |= E2F_FLAG_ABORT;
+				return -EINVAL;
+			}
 
-		if (lfsck_check_lov_ea(ctx, lmm)) {
-			ctx->flags |= E2F_FLAG_ABORT;
-			return -EINVAL;
+			return e2fsck_lfsck_save_ea(ctx, ino, inode->i_generation, lmm);
 		}
-
-		return e2fsck_lfsck_save_ea(ctx, ino, inode->i_generation, lmm);
+	} else {
+		if (entry->e_name_index == EXT3_XATTR_INDEX_TRUSTED &&
+		    !strncmp(entry->e_name, XATTR_LUSTRE_MDS_LMV_EA, entry->e_name_len)) {
+			struct lmv_user_md *lmv = value;
+			letocpu_lmv_user_md(lmv);
+			if (lfsck_check_lmv_ea(ctx, lmv)) {
+				ctx->flags |= E2F_FLAG_ABORT;
+				return -EINVAL;
+			}
+			return e2fsck_lfsck_save_lmv_ea(ctx, ino, inode->i_generation, lmv);		
+		}	
 	}
-
 	return 0;
 }
 
@@ -452,6 +588,11 @@ int e2fsck_lfsck_flush_ea(e2fsck_t ctx)
 		rc += dbp->close(dbp, 0);
 		ctx->lfsck_oinfo->mds_sizeinfo_dbp = NULL;
 	}
+	if (ctx->lfsck_oinfo->mds_dirstripe_dbp != NULL) {
+		dbp = ctx->lfsck_oinfo->mds_dirstripe_dbp;
+		rc += dbp->close(dbp, 0);
+		ctx->lfsck_oinfo->mds_dirstripe_dbp = NULL;
+	}
 
 	if (rc)
 		ctx->flags |= E2F_FLAG_ABORT;
@@ -459,6 +600,65 @@ int e2fsck_lfsck_flush_ea(e2fsck_t ctx)
 	return(rc);
 }
 
+int e2fsck_lfsck_put_fid(e2fsck_t ctx, struct ext2_dir_entry_2 *de)
+{
+	char *len = de->name + de->name_len + 1 /* NUL terminator */;
+	struct lu_fid *fid;
+	DBT key, data;
+	DB *dbp;
+	char *mdsdb;
+
+	assert(ctx->lustre_mds_files == 1);
+	mdsdb = ctx->lustre_mdsdb[0];
+
+	assert(ctx->lfsck_oinfo != NULL); 
+	if (ctx->lfsck_oinfo->mds_dirfid_dbp == NULL) {
+		ext2_filsys fs = ctx->fs;
+		__u32 numfiles = fs->super->s_inodes_count -
+			 	 fs->super->s_free_inodes_count;
+		if (lfsck_opendb(mdsdb, MDS_DFIDDB,
+				 &ctx->lfsck_oinfo->mds_dirfid_dbp, 0,
+				 sizeof(__u32) + sizeof(struct lu_fid), 
+				 numfiles, DB_HASH)) {
+			fprintf(stderr, "Failed to open db file %s\n",
+				MDS_DFIDDB);
+			ctx->flags |= E2F_FLAG_ABORT;
+			return (EIO);
+		}
+	}
+
+	fid = (struct lu_fid *)(len + 1);
+
+	e2fsck_fid_be_to_cpu(fid, fid);
+	memset(&key, 0, sizeof(key));
+	memset(&data, 0, sizeof(data));
+	key.data = &de->inode;
+	key.size = sizeof(de->inode);
+
+	e2fsck_fid_cpu_to_le(fid, fid);
+	
+	data.data = fid;
+	data.size = sizeof(*fid);
+
+	dbp = ctx->lfsck_oinfo->mds_dirfid_dbp;
+	if (dbp->put(dbp, NULL, &key, &data, 0) != 0) {
+		fprintf(stderr, "Failure to put data into db\n");
+		ctx->flags |= E2F_FLAG_ABORT;
+		return(DIRENT_ABORT);
+	}
+	return 0;
+}
+
+int e2fsck_lfsck_flush_fid(e2fsck_t ctx)
+{
+	if (ctx->lfsck_oinfo && ctx->lfsck_oinfo->mds_dirfid_dbp != NULL) {
+		DB* dbp = ctx->lfsck_oinfo->mds_dirfid_dbp;
+		dbp->close(dbp, 0);
+		ctx->lfsck_oinfo->mds_dirfid_dbp = NULL;			
+	}
+	return 0;
+}
+
 /* From debugfs.c for file removal */
 static int lfsck_release_blocks_proc(ext2_filsys fs, blk_t *blocknr,
 			       int blockcnt, void *private)
@@ -555,6 +755,7 @@ static int lfsck_list_objs(ext2_ino_t di
 	}
 
 	objent.ost_objid = objid;
+	objent.ost_group = lctx->mds_group;
 	objent.ost_flag = 0;
 	if (LINUX_S_ISREG(inode.i_mode))
 		objent.ost_size = EXT2_I_SIZE(&inode);
@@ -665,11 +866,15 @@ static int lfsck_iterate_obj_dirs(ext2_i
 }
 
 /* Get the starting point of where the objects reside */
-static int lfsck_get_object_dir(e2fsck_t ctx, char *block_buf,ext2_ino_t *inode)
+static int lfsck_get_object_dir(e2fsck_t ctx, char *block_buf, 
+				ext2_ino_t **inode)
 {
 	ext2_filsys fs = ctx->fs;
 	ext2_ino_t  tinode;
+	int i = 0;
 	int rc;
+	int count = ctx->lustre_mds_files;
+	ext2_ino_t *dir = *inode;
 
 	rc = ext2fs_lookup(fs, EXT2_ROOT_INO, OBJECT_DIR, strlen(OBJECT_DIR),
 			   block_buf, &tinode);
@@ -677,46 +882,57 @@ static int lfsck_get_object_dir(e2fsck_t
 		fprintf(stderr, "error looking up OST object parent dir\n");
 		return (ENOENT);
 	}
+
 	rc = ext2fs_check_directory(fs, tinode);
-	if (rc) {
+	if (rc)
 		return(ENOENT);
-	}
 
 	rc = ext2fs_lookup(fs, tinode, OBJECT_DIR_V1, strlen(OBJECT_DIR_V1),
-			   block_buf, inode);
+			   block_buf, dir);
 	if (rc) {
-		rc = ext2fs_lookup(fs, tinode, OBJECT_DIR_V2,
-				   strlen(OBJECT_DIR_V2), block_buf, inode);
-		if (rc) {
-			fprintf(stderr, "error looking up OST object subdir\n");
-			return (-ENOENT);
-		}
-	}
-	rc = ext2fs_check_directory(fs, *inode);
-	if (rc) {
-		return(-ENOENT);
+		for (i = 0; i < count; i++) { 
+			char object_name[5];
+			sprintf(object_name, "%d", i == 0 ? i : 
+					   i + FIRST_MDT_GROUP - 1);
+			rc = ext2fs_lookup(fs, tinode, object_name,
+					   strlen(object_name), block_buf, dir);
+			if (rc) {
+				fprintf(stderr, "error looking up OST object subdir %d \n", i);
+				break;
+			}
+			rc = ext2fs_check_directory(fs, *dir);
+			if (rc)
+				return(-ENOENT);
+			dir ++;
+		}
+	} else {
+		rc = ext2fs_check_directory(fs, *dir);
+		if (rc)
+			return(-ENOENT);
 	}
 	return(0);
 }
 
 /* What is the last object id for the OST */
-static int lfsck_get_last_id(e2fsck_t ctx, __u64 *last_id)
+static int lfsck_get_last_id(e2fsck_t ctx, __u64 *last_id, int index)
 {
 	ext2_filsys fs = ctx->fs;
-	ext2_ino_t  inode, tinode;
+	ext2_ino_t  inode[LMV_MAX_MDTS], tinode;
 	ext2_file_t  e2_file;
 	char *block_buf;
 	unsigned int got;
 	int rc;
+	ext2_ino_t *dir = &inode[0];
 
 	block_buf = e2fsck_allocate_memory(ctx, fs->blocksize * 3,
 					   "lookup buffer");
 
-	rc = lfsck_get_object_dir(ctx, block_buf, &inode);
+	rc = lfsck_get_object_dir(ctx, block_buf, &dir);
 	if (rc)
 		goto out;
 
-	rc = ext2fs_lookup(fs, inode, LAST_ID,
+	assert(index < ctx->lustre_mds_files);
+	rc = ext2fs_lookup(fs, inode[index], LAST_ID,
 			   strlen(LAST_ID), block_buf, &tinode);
 	if (rc)
 		goto out;
@@ -736,32 +952,31 @@ static int lfsck_get_last_id(e2fsck_t ct
 		ext2fs_file_close(e2_file);
 		goto out;
 	}
-
 	rc = ext2fs_file_close(e2_file);
-
 	*last_id = ext2fs_le64_to_cpu(*last_id);
 out:
 	ext2fs_free_mem(&block_buf);
 	return (rc);
 }
 
-int lfsck_set_last_id(e2fsck_t ctx,  __u64 last_id)
+int lfsck_set_last_id(e2fsck_t ctx,  __u64 last_id, int group)
 {
 	ext2_filsys fs = ctx->fs;
-	ext2_ino_t  inode, tinode;
+	ext2_ino_t  inode[LMV_MAX_MDTS], tinode;
 	ext2_file_t  e2_file;
 	char *block_buf;
 	unsigned int written;
 	int rc;
+	ext2_ino_t *dir = inode;
 
 	block_buf = e2fsck_allocate_memory(ctx, fs->blocksize * 3,
 					   "lookup buffer");
-
-	rc = lfsck_get_object_dir(ctx, block_buf, &inode);
+	rc = lfsck_get_object_dir(ctx, block_buf, &dir);
 	if (rc)
 		goto out;
 
-	rc = ext2fs_lookup(fs, inode, LAST_ID,
+	assert(group < ctx->lustre_mds_files);
+	rc = ext2fs_lookup(fs, inode[group], LAST_ID,
 			   strlen(LAST_ID), block_buf, &tinode);
 	if (rc)
 		goto out;
@@ -787,7 +1002,6 @@ int lfsck_set_last_id(e2fsck_t ctx,  __u
 	}
 
 	rc = ext2fs_file_close(e2_file);
-
 out:
 	ext2fs_free_mem(&block_buf);
 	return (rc);
@@ -836,9 +1050,12 @@ int e2fsck_get_last_rcvd_info(e2fsck_t c
 	if (local_uuid)
 		memcpy(local_uuid, &lsd->lsd_uuid, sizeof(lsd->lsd_uuid));
 
-	if (peer_uuid)
+	if (peer_uuid) {
 		memcpy(peer_uuid, &lsd->lsd_peeruuid,sizeof(lsd->lsd_peeruuid));
+		fprintf(stderr, "peeruuid is %s \n", peer_uuid->uuid);
+	}
 
+	
 	if (subdircount)
 		*subdircount = ext2fs_le16_to_cpu(lsd->lsd_subdir_count);
 
@@ -984,9 +1201,12 @@ int lfsck_create_objid(e2fsck_t ctx, __u
 	char name[32];
 	int len, dirlen;
 	__u32 compat, incompat, subdircount;
-	ext2_ino_t  inode, tinode, cinode;
+	ext2_ino_t  inode[LMV_MAX_MDTS], tinode, cinode;
 	struct ext2_inode ext2inode;
 	char *block_buf;
+	int count = ctx->lustre_mds_files;
+	ext2_ino_t *dir = inode;
+	int i;
 
 	block_buf = e2fsck_allocate_memory(ctx, ctx->fs->blocksize * 3,
 					   "lookup buffer");
@@ -1012,55 +1232,56 @@ int lfsck_create_objid(e2fsck_t ctx, __u
 		goto out;
 	}
 
-	if (lfsck_get_object_dir(ctx, block_buf, &inode)) {
+	if (lfsck_get_object_dir(ctx, block_buf, &dir)) {
 		rc = EINVAL;
 		goto out;
 	}
 
 	dirlen = sprintf(dirname, "d%u", (int)objid & (subdircount - 1));
 
-	rc = ext2fs_lookup(ctx->fs, inode, dirname,
-			   dirlen, block_buf, &tinode);
-	if (rc) {
-		rc = EINVAL;
-		goto out;
-	}
+	for (i = 0; i < count; i++) {
+		rc = ext2fs_lookup(ctx->fs, inode[i], dirname,
+				   dirlen, block_buf, &tinode);
+		if (rc) {
+			rc = EINVAL;
+			goto out;
+		}
 
-	if (ext2fs_namei(ctx->fs, EXT2_ROOT_INO, tinode, name, &cinode) == 0) {
-		fprintf(stderr, "Failure to create obj\n");
-		rc = EINVAL;
-		goto out;
-	}
+		if (ext2fs_namei(ctx->fs, EXT2_ROOT_INO, tinode, name, &cinode) == 0) {
+			fprintf(stderr, "Failure to create obj\n");
+			rc = EINVAL;
+			goto out;
+		}
 
-	rc = ext2fs_new_inode(ctx->fs, tinode, 010755, 0, &cinode);
-	if (rc) {
-		fprintf(stderr, "Failure to create obj\n");
-		rc = EINVAL;
-		goto out;
-	}
+		rc = ext2fs_new_inode(ctx->fs, tinode, 010755, 0, &cinode);
+		if (rc) {
+			fprintf(stderr, "Failure to create obj\n");
+			rc = EINVAL;
+			goto out;
+		}
 
-	rc = ext2fs_link(ctx->fs, tinode, name, cinode, EXT2_FT_REG_FILE);
-	if (rc) {
-		fprintf(stderr, "Failure to create obj\n");
-		rc = EINVAL;
-		goto out;
-	}
+		rc = ext2fs_link(ctx->fs, tinode, name, cinode, EXT2_FT_REG_FILE);
+		if (rc) {
+			fprintf(stderr, "Failure to create obj\n");
+			rc = EINVAL;
+			goto out;
+		}
 
-	if (ext2fs_test_inode_bitmap(ctx->fs->inode_map, cinode)) {
-		fprintf(stderr, "Warning: inode already set");
-	}
-	ext2fs_inode_alloc_stats2(ctx->fs, cinode, +1, 0);
-	memset(&ext2inode, 0, sizeof(ext2inode));
-	ext2inode.i_mode = LINUX_S_IFREG;
-	ext2inode.i_atime = ext2inode.i_ctime = ext2inode.i_mtime = time(NULL);
-	ext2inode.i_links_count = 1;
-	ext2inode.i_size = 0;
-	if (ext2fs_write_inode(ctx->fs, cinode, &ext2inode)) {
-		fprintf(stderr, "Failure to create obj\n");
-		rc = EINVAL;
-		goto out;
+		if (ext2fs_test_inode_bitmap(ctx->fs->inode_map, cinode)) {
+			fprintf(stderr, "Warning: inode already set");
+		}
+		ext2fs_inode_alloc_stats2(ctx->fs, cinode, +1, 0);
+		memset(&ext2inode, 0, sizeof(ext2inode));
+		ext2inode.i_mode = LINUX_S_IFREG;
+		ext2inode.i_atime = ext2inode.i_ctime = ext2inode.i_mtime = time(NULL);
+		ext2inode.i_links_count = 1;
+		ext2inode.i_size = 0;
+		if (ext2fs_write_inode(ctx->fs, cinode, &ext2inode)) {
+			fprintf(stderr, "Failure to create obj\n");
+			rc = EINVAL;
+			goto out;
+		}
 	}
-
 out:
 	ext2fs_free_mem((void *)&(block_buf));
 	return (rc);
@@ -1074,13 +1295,14 @@ void e2fsck_pass6_ost(e2fsck_t ctx)
 	ext2_filsys fs = ctx->fs;
 	struct lfsck_ost_ctx lctx;
 	struct lfsck_ost_hdr ost_hdr;
-	struct lfsck_mds_hdr mds_hdr;
+	struct lfsck_mds_hdr mds_hdr[LMV_MAX_MDTS];
 	struct lfsck_ost_objent objent;
 	DB *outdb = NULL;
-	DB *mds_hdrdb = NULL;
+	DB *mds_hdrdb[LMV_MAX_MDTS] = {NULL};
 	DB *osthdr = NULL;
 	DBT key, data;
-	ext2_ino_t dir;
+	ext2_ino_t dir[LMV_MAX_MDTS];
+	ext2_ino_t *dirp = &dir[0];
 	__u32 compat, rocompat, incompat;
 	int i, rc;
 	char *block_buf = NULL;
@@ -1096,38 +1318,39 @@ void e2fsck_pass6_ost(e2fsck_t ctx)
 
 	block_buf = e2fsck_allocate_memory(ctx, fs->blocksize * 3,
 					   "block iterate buffer");
+	for (i = 0; i < ctx->lustre_mds_files; i ++) {		
+		rc = lfsck_opendb(ctx->lustre_mdsdb[i], MDS_HDR, &mds_hdrdb[i], 0, 0, 0, 
+				  DB_HASH);
+		if (rc != 0) {
+			fprintf(stderr, "failure to open database %s: %s\n",
+				MDS_HDR, db_strerror(rc));
+			ctx->flags |= E2F_FLAG_ABORT;
+			goto out;
+		}
 
-	rc = lfsck_opendb(ctx->lustre_mdsdb, MDS_HDR, &mds_hdrdb, 0, 0, 0);
-	if (rc != 0) {
-		fprintf(stderr, "failure to open database %s: %s\n",
-			MDS_HDR, db_strerror(rc));
-		ctx->flags |= E2F_FLAG_ABORT;
-		goto out;
-	}
+		memset(&key, 0, sizeof(key));
+		memset(&data, 0, sizeof(data));
+		mds_hdr[i].mds_magic = MDS_MAGIC;
+		key.data = &mds_hdr[i].mds_magic;
+		key.size = sizeof(mds_hdr[i].mds_magic);
+		data.data = &mds_hdr[i];
+		data.size = sizeof(struct lfsck_mds_hdr);
+		data.ulen = sizeof(struct lfsck_mds_hdr);
+		data.flags = DB_DBT_USERMEM;
+		rc = mds_hdrdb[i]->get(mds_hdrdb[i], NULL, &key, &data, 0);
+		if (rc) {
+			fprintf(stderr,"error getting mds_hdr ("LPU64":%u) in %s: %s\n",
+				mds_hdr[i].mds_magic, (int)sizeof(mds_hdr[i].mds_magic),
+				ctx->lustre_mdsdb[i], db_strerror(rc));
+			ctx->flags |= E2F_FLAG_ABORT;
+			goto out;
+		}
 
-	memset(&key, 0, sizeof(key));
-	memset(&data, 0, sizeof(data));
-	mds_hdr.mds_magic = MDS_MAGIC;
-	key.data = &mds_hdr.mds_magic;
-	key.size = sizeof(mds_hdr.mds_magic);
-	data.data = &mds_hdr;
-	data.size = sizeof(mds_hdr);
-	data.ulen = sizeof(mds_hdr);
-	data.flags = DB_DBT_USERMEM;
-	rc = mds_hdrdb->get(mds_hdrdb, NULL, &key, &data, 0);
-	if (rc) {
-		fprintf(stderr,"error getting mds_hdr ("LPU64":%u) in %s: %s\n",
-			mds_hdr.mds_magic, (int)sizeof(mds_hdr.mds_magic),
-			ctx->lustre_mdsdb, db_strerror(rc));
-		ctx->flags |= E2F_FLAG_ABORT;
-		goto out;
+		assert(data.size == sizeof(struct lfsck_mds_hdr));
+		memcpy(&mds_hdr[i], data.data, sizeof(struct lfsck_mds_hdr));
+		letocpu_mds_hdr(&mds_hdr[i]);
 	}
-
-	assert(data.size == sizeof(mds_hdr));
-	memcpy(&mds_hdr, data.data, sizeof(mds_hdr));
-	letocpu_mds_hdr(&mds_hdr);
-
-	rc = lfsck_opendb(ctx->lustre_ostdb, OST_HDR, &osthdr, 0, 0, 0);
+	rc = lfsck_opendb(ctx->lustre_ostdb, OST_HDR, &osthdr, 0, 0, 0, DB_HASH);
 	if (rc != 0) {
 		fprintf(stderr, "failure to open database %s: %s\n",
 			OST_HDR, db_strerror(rc));
@@ -1135,17 +1358,6 @@ void e2fsck_pass6_ost(e2fsck_t ctx)
 		goto out;
 	}
 
-	rc = lfsck_opendb(ctx->lustre_ostdb, OST_OSTDB, &outdb, 0,
-			  sizeof(objent.ost_objid) + sizeof(objent),
-			  fs->super->s_inodes_count -
-			  fs->super->s_free_inodes_count);
-	if (rc != 0) {
-		fprintf(stderr, "error getting ost_hdr in %s: %s\n",
-			ctx->lustre_ostdb, db_strerror(rc));
-		ctx->flags |= E2F_FLAG_ABORT;
-		goto out;
-	}
-
 	if (e2fsck_get_last_rcvd_info(ctx, &ost_hdr.ost_uuid,
 				      &ost_hdr.ost_mds_uuid, NULL,
 				      &ost_hdr.ost_index,
@@ -1169,7 +1381,7 @@ void e2fsck_pass6_ost(e2fsck_t ctx)
 	 * Get /O/R or /O/0 directory
 	 * for each entry scan all the dirents and get the object id
 	 */
-	if (lfsck_get_object_dir(ctx, block_buf, &dir)) {
+	if (lfsck_get_object_dir(ctx, block_buf, &dirp)) {
 		ctx->flags |= E2F_FLAG_ABORT;
 		goto out;
 	}
@@ -1178,56 +1390,79 @@ void e2fsck_pass6_ost(e2fsck_t ctx)
 	 * Okay so we have the containing directory so let's iterate over the
 	 * containing d* dirs and then iterate again inside
 	 */
-	lctx.ctx = ctx;
-	lctx.outdb = outdb;
-	lctx.status = 0;
-	lctx.numfiles = 0;
-	lctx.max_objid = 0;
-	lctx.status = ext2fs_dir_iterate2(fs, dir, 0, block_buf,
-					  lfsck_iterate_obj_dirs, &lctx);
-	if (lctx.status) {
-		fprintf(stderr, "Failure in iterating object dirs\n");
-		ctx->flags |= E2F_FLAG_ABORT;
-		return;
-	}
-
-	ost_hdr.ost_magic = OST_MAGIC;
-	ost_hdr.ost_flags = ctx->options & E2F_OPT_READONLY;
-	ost_hdr.ost_num_files = lctx.numfiles;
-	VERBOSE(ctx, "OST: num files = %u\n", lctx.numfiles);
-
-	if (lfsck_get_last_id(ctx, &ost_hdr.ost_last_id)) {
-		fprintf(stderr, "Failure to get last id for objects\n");
-		ctx->flags |= E2F_FLAG_ABORT;
-		goto out;
-	}
-	VERBOSE(ctx, "OST: last_id = "LPU64"\n", ost_hdr.ost_last_id);
+	for (i = 0; i < ctx->lustre_mds_files; i++) {
+		char dbname[256];
+		sprintf(dbname, "%s.%d", OST_OSTDB, i);
+		rc = lfsck_opendb(ctx->lustre_ostdb, dbname, &outdb, 0,
+				  sizeof(objent.ost_objid) + sizeof(objent),
+				  fs->super->s_inodes_count -
+				  fs->super->s_free_inodes_count, DB_HASH);
+		if (rc != 0) {
+			fprintf(stderr, "error getting ost_hdr in %s: %s\n",
+				ctx->lustre_ostdb, db_strerror(rc));
+			ctx->flags |= E2F_FLAG_ABORT;
+			goto out;
+		}
 
-	/* Update the last_id value on the OST if necessary/possible to the
-	 * MDS value if larger.  Otherwise we risk creating duplicate objects.
-	 * If running read-only, we skip this so new objects are ignored. */
-	ost_hdr.ost_last_id = lctx.max_objid;
-	if (!(ctx->options & E2F_OPT_READONLY) &&
-	    !(mds_hdr.mds_flags & E2F_OPT_READONLY)) {
-		for (i = 0; i < mds_hdr.mds_num_osts; i++) {
-			if (strcmp((char *)mds_hdr.mds_ost_info[i].uuid,
-				   (char *)ost_hdr.ost_uuid.uuid) == 0 &&
-			    mds_hdr.mds_max_ost_id[i] >= ost_hdr.ost_last_id)
-				ost_hdr.ost_last_id=mds_hdr.mds_max_ost_id[i]+1;
+		lctx.ctx = ctx;
+		lctx.outdb = outdb;
+		lctx.status = 0;
+		lctx.numfiles = 0;
+		lctx.max_objid = 0;
+		lctx.mds_group = i == 0 ? i : i + FIRST_MDT_GROUP - 1; 
+		lctx.status = ext2fs_dir_iterate2(fs, dir[i], 0, block_buf,
+						  lfsck_iterate_obj_dirs, &lctx);
+		if (lctx.status) {
+			fprintf(stderr, "Failure in iterating object dirs\n");
+			ctx->flags |= E2F_FLAG_ABORT;
+			return;
 		}
+		VERBOSE(ctx, "OST group %d : num files = %u\n", i, lctx.numfiles);
 
-	        if (lfsck_set_last_id(ctx, ost_hdr.ost_last_id)) {
-		        fprintf(stderr, "Failure to set last id\n");
-		        ctx->flags |= E2F_FLAG_ABORT;
-		        goto out;
-	        }
+		if (lfsck_get_last_id(ctx, &ost_hdr.ost_last_id[i], i)) {
+			fprintf(stderr, "Failure to get last id for objects\n");
+			ctx->flags |= E2F_FLAG_ABORT;
+			goto out;
+		}
+		VERBOSE(ctx, "OST group %d : last_id = "LPU64"\n", i, ost_hdr.ost_last_id[i]);
 
+		/* Update the last_id value on the OST if necessary/possible to the
+		 * MDS value if larger.  Otherwise we risk creating duplicate objects.
+		 * If running read-only, we skip this so new objects are ignored. */
+		ost_hdr.ost_last_id[i] = lctx.max_objid;
+		if (!(ctx->options & E2F_OPT_READONLY)){
+			int index, k;
+			for (index = 0; index < ctx->lustre_mds_files; index++)
+				if (i == mds_hdr[index].mds_index) 
+					break;
+		    	assert(index != ctx->lustre_mds_files);
+			if (!(mds_hdr[index].mds_flags & E2F_OPT_READONLY)) {
+				for (k = 0; k < mds_hdr[index].mds_num_osts; k++) {
+					if (strcmp((char *)mds_hdr[index].mds_ost_info[k].uuid,
+						   (char *)ost_hdr.ost_uuid.uuid) == 0 &&
+					    	    mds_hdr[index].mds_max_ost_id[k] >= ost_hdr.ost_last_id[i])
+						ost_hdr.ost_last_id[i]=mds_hdr[index].mds_max_ost_id[k]+1;
+				}
+				if (lfsck_set_last_id(ctx, ost_hdr.ost_last_id[i], i)) {
+					fprintf(stderr, "Failure to set last id\n");
+					ctx->flags |= E2F_FLAG_ABORT;
+					goto out;
+				}
 #ifdef LOG_REMOVAL
-		if (lfsck_remove_ost_logs(ctx, block_buf))
-			ctx->flags |= E2F_FLAG_ABORT;
+				if (lfsck_remove_ost_logs(ctx, block_buf))
+					ctx->flags |= E2F_FLAG_ABORT;
 #endif
+			}
+		}
+		ost_hdr.ost_mds_num ++;
+		outdb->close(outdb, 0);
+		outdb = NULL;
 	}
 
+	ost_hdr.ost_magic = OST_MAGIC;
+	ost_hdr.ost_flags = ctx->options & E2F_OPT_READONLY;
+	ost_hdr.ost_num_files = lctx.numfiles;
+
 	memset(&key, 0, sizeof(key));
 	memset(&data, 0, sizeof(data));
 	key.data = &ost_hdr.ost_magic;
@@ -1242,8 +1477,9 @@ void e2fsck_pass6_ost(e2fsck_t ctx)
 	}
 
 out:
-	if (mds_hdrdb)
-		mds_hdrdb->close(mds_hdrdb, 0);
+	for (i = 0; i < ctx->lustre_mds_files; i ++)
+		if (mds_hdrdb[i])
+			mds_hdrdb[i]->close(mds_hdrdb[i], 0);
 	if (outdb)
 		outdb->close(outdb, 0);
 	if (osthdr)
@@ -1287,6 +1523,662 @@ int lfsck_remove_mds_logs(e2fsck_t ctx)
 	return (rc);
 }
 
+/**
+ * On-disk format:
+ * iam mostly tries to reuse existing htree formats.
+ * 
+ * Format of index node:
+ *
+ * +-----+-------+-------+-------+------+-------+------------+
+ * |     | count |       |       |      |       |            |
+ * | gap |   /   | entry | entry | .... | entry | free space |
+ * |     | limit |       |       |      |       |            |
+ * +-----+-------+-------+-------+------+-------+------------+
+ *
+ *       gap           this part of node is never accessed by iam code. It
+ *                     exists for binary compatibility with ldiskfs htree (that,
+ *                     in turn, stores fake struct ext2_dirent for ext2
+ *                     compatibility), and to keep some unspecified per-node
+ *                     data. Gap can be different for root and non-root index
+ *                     nodes. Gap size can be specified for each container
+ *                     (gap of 0 is allowed).
+ *
+ *       count/limit   current number of entries in this node, and the maximal
+ *                     number of entries that can fit into node. count/limit
+ *                     has the same size as entry, and is itself counted in
+ *                     count.
+ *
+ *       entry         index entry: consists of a key immediately followed by
+ *                     a pointer to a child node. Size of a key and size of a
+ *                     pointer depends on container. Entry has neither
+ *                     alignment nor padding.
+ *
+ *       free space    portion of node new entries are added to
+ *
+ * Entries in index node are sorted by their key value.
+ **/
+struct iam_entry;
+struct iam_lentry;
+ 
+struct iam_frame 
+{
+        void *data;
+        struct iam_entry *entries;
+        struct iam_entry *at;
+	__u32		  curidx; 
+};
+
+struct iam_root {
+        __u64  ir_magic;
+        __u16  ir_keysize;
+        __u16  ir_recsize;
+        __u16  ir_ptrsize;
+        __u8   ir_indirect_levels;
+        __u8   ir_padding;
+};
+
+struct iam_countlimit {
+        __u16 limit;
+        __u16 count;
+};
+
+
+
+static inline struct iam_entry 
+*e2fs_iam_get_entries(struct iam_frame *frame, int level)
+{
+	return (level == 0 ? (frame->data + sizeof(struct iam_root)) :
+			      frame->data);
+}
+
+static inline unsigned e2fs_iam_get_count(struct iam_entry *entries)
+{
+        return ((struct iam_countlimit *) entries)->count;
+}
+
+static inline int e2fs_iam_entry_size(struct iam_root *root)
+{
+	return (root->ir_keysize + root->ir_ptrsize);
+}
+
+static inline struct iam_entry *e2fs_iam_entry_shift(struct iam_entry *entry,
+                                                     int shift,
+						     struct iam_root *root)
+{                           
+        void *e = entry;
+        return e + shift * e2fs_iam_entry_size(root);
+}               
+
+static inline unsigned long e2fs_iam_entry_diff(struct iam_root *root,
+                                      		struct iam_entry *e1,
+                                       		struct iam_entry *e2)
+{
+	unsigned long diff;
+        diff = (void *)e1 - (void *)e2;
+        return diff / e2fs_iam_entry_size(root);
+}       
+
+static struct iam_entry * 
+e2fs_iam_find_position(struct iam_frame *frame, __u64 key, 
+		       struct iam_root *root)
+{
+        unsigned count;
+        struct iam_entry *p;
+        struct iam_entry *q;
+        struct iam_entry *m;
+
+        count = e2fs_iam_get_count(frame->entries);
+        p = e2fs_iam_entry_shift(frame->entries, 2, root);
+        q = e2fs_iam_entry_shift(frame->entries, count - 1, root);
+        while (p <= q) {
+                m = e2fs_iam_entry_shift(p, e2fs_iam_entry_diff(root, q, p) / 2, root);
+                if (memcmp(m, &key, root->ir_keysize) > 0)
+                        q = e2fs_iam_entry_shift(m, -1, root);
+                else
+                        p = e2fs_iam_entry_shift(m, +1, root);
+        }
+        return e2fs_iam_entry_shift(p, -1, root);
+} 
+
+static inline unsigned e2fs_iam_get_block(struct iam_root *root, 
+					  struct iam_entry *entry)
+{
+        return (__u32) (*(((char *) entry + root->ir_keysize)));
+}
+
+#define  IAM_LEAF_HEADER_MAGIC 0x1976 
+struct iam_leaf_head { 
+        __u16 ill_magic;
+        __u16 ill_count;
+};
+
+static int e2fs_iam_check_leaf(char *leaf)
+{
+        struct iam_leaf_head *hdr;
+        
+	hdr = (struct iam_leaf_head*)leaf;
+        if (hdr->ill_magic != IAM_LEAF_HEADER_MAGIC)
+		return -EINVAL;
+	return 0;
+}
+
+static struct iam_lentry *e2fs_iam_lentries(void *leaf)
+{
+	return (void *)leaf + sizeof(struct iam_leaf_head);
+}
+
+static inline int e2fs_iam_lentry_size(const struct iam_root *root)
+{
+        return root->ir_keysize + root->ir_recsize;
+}
+
+static struct iam_lentry 
+*e2fs_iam_lentry_shift(struct iam_lentry *entry, int shift, 
+		       struct iam_root *root)
+{
+	return (void *)entry + shift * e2fs_iam_lentry_size(root);
+}
+
+static void
+*e2fs_iam_lentry_rec(struct iam_root *root, struct iam_lentry *entry)
+{
+	return (char *)entry + root->ir_keysize;
+}
+
+static inline int 
+e2fs_iam_lentry_diff(struct iam_root *root,
+                     struct iam_lentry *e1,
+                     struct iam_lentry *e2)
+{
+        int diff;
+        int esize;
+
+        esize = e2fs_iam_lentry_size(root);
+        diff = (void *)e1 - (void *)e2;
+        return diff / esize;
+}
+
+#define IAM_ROOT_MAGIC 0xbedabb1edULL 
+static int e2fs_iam_find_leaf(ext2_filsys fs, ext2_ino_t inode,
+			      ext2_file_t file, __u64 key,
+			      struct iam_frame *frames,
+			      struct iam_frame **frame,
+			      struct iam_root *root)
+{
+	int rc = 0;
+	unsigned int got;
+	int i = 0;
+	__u32 ptr = 0;
+	struct iam_frame *p;
+
+	p = &frames[0];
+        while (1) {
+                p->at = e2fs_iam_find_position(p, key, root);
+		ptr = e2fs_iam_get_block(root, p->at);
+		if (++i > root->ir_indirect_levels)
+			break;
+		p++;
+		ext2fs_file_llseek(file, ptr * fs->blocksize, EXT2_SEEK_SET, NULL);
+		rc = ext2fs_file_read(file, p->data, fs->blocksize, &got);
+                if (rc != 0)
+                        break;
+	}
+	*frame = p;
+
+	return rc;	
+}
+
+typedef int (*iam_entry_cb_t)(struct iam_root *root, struct iam_lentry *entry,
+			      void *data);
+
+static int fldb_fill_cb(struct iam_root *root, struct iam_lentry *entry,
+			void *cb_data)
+{
+	struct lfsck_mds_fldb *range;
+	int rc = 0;	
+	DB *fldb = (DB *)cb_data;
+	seqno_t seq;
+	DBT key, data;
+
+	range = (struct lfsck_mds_fldb *)e2fs_iam_lentry_rec(root, entry);
+	seq = *(seqno_t*)entry;
+
+	seq = ext2fs_be64_to_cpu(seq);
+	range->lsr_start = ext2fs_be64_to_cpu(range->lsr_start);
+	range->lsr_end = ext2fs_be64_to_cpu(range->lsr_end);
+	range->lsr_index = ext2fs_be32_to_cpu(range->lsr_index);
+	range->lsr_flags = ext2fs_be64_to_cpu(range->lsr_flags);
+
+        memset(&key, 0, sizeof(key));
+        memset(&data, 0, sizeof(data));
+        key.data = &seq;
+        key.size = sizeof(seq);
+        data.data = range;
+        data.size = sizeof(*range);
+
+	if ((rc = fldb->put(fldb, NULL, &key, &data, 0)) != 0) {
+		fldb->err(fldb, rc, "db->put failed\n");
+		return (EIO);
+	}
+
+	return rc;
+}
+
+static int oi_fill_cb(struct iam_root *root, struct iam_lentry *entry,
+		      void *cb_data)
+{
+	struct osd_inode_id *id;
+	struct lu_fid *fid;
+	DB *oidb = (DB *)cb_data;
+	DBT key, data;
+	int rc;
+
+	id = (struct osd_inode_id *)e2fs_iam_lentry_rec(root, entry);
+	fid = (struct lu_fid *)entry;
+
+	id->oii_ino = ext2fs_be32_to_cpu(id->oii_ino);
+	id->oii_gen = ext2fs_be32_to_cpu(id->oii_gen);
+
+	e2fsck_fid_be_to_cpu(fid, fid);
+        memset(&key, 0, sizeof(key));
+        memset(&data, 0, sizeof(data));
+
+        key.data = fid;
+        key.size = sizeof(struct lu_fid);
+        data.data = id;
+        data.size = sizeof(struct osd_inode_id);
+	
+	if ((rc = oidb->put(oidb, NULL, &key, &data, 0)) != 0) {
+		oidb->err(oidb, rc, "db->put failed\n");
+		return (EIO);
+	}
+
+	return 0;
+}
+
+static int e2fs_iam_fill_db(ext2_filsys fs, struct iam_frame *frame, 
+			    struct iam_frame *frames, ext2_file_t file,
+			    struct iam_root *root, DB *fldb, 
+			    iam_entry_cb_t callback)
+{
+	struct iam_leaf_head *ilh;
+        struct iam_lentry *p;
+	int rc;
+	unsigned int got;
+	int count;
+	int i;
+	__u32 ptr;
+
+	ptr = e2fs_iam_get_block(root, frame->at);
+	ext2fs_file_llseek(file, ptr * fs->blocksize, EXT2_SEEK_SET, NULL);
+	frame ++;
+	assert(frame == frames + root->ir_indirect_levels + 1); 
+	rc = ext2fs_file_read(file, frame->data, fs->blocksize, &got);
+	if (rc != 0)
+		return rc;
+
+	rc = e2fs_iam_check_leaf(frame->data);
+	if (rc)
+		goto fail;
+
+	ilh = (struct iam_leaf_head *)frame->data;
+        count = ilh->ill_count;
+	p = e2fs_iam_lentries(frame->data);
+	for (i = 0; i < count; i++) {
+		rc = callback(root, p, (void*)fldb);
+		if (rc)
+			break;
+		p = e2fs_iam_lentry_shift(p, 1, root);
+	}
+	rc = count;
+fail:
+	return rc;
+}
+
+/* copy from ext4_htree_next_block */
+static int e2fs_iam_next_block(ext2_filsys fs, 
+			       struct iam_root *root,
+			       ext2_file_t file,
+			       struct iam_frame *frame,
+			       struct iam_frame *frames)
+{
+	struct iam_frame *p;
+	struct iam_entry *end;
+	int err = 0, num_frames = 0;
+
+	p = frame;
+	/*
+	 * Find the next leaf page by incrementing the frame pointer.
+	 * If we run out of entries in the interior node, loop around and
+	 * increment pointer in the parent node.  When we break out of
+	 * this loop, num_frames indicates the number of interior
+	 * nodes need to be read.
+	 */
+	while (1) {
+		p->at = e2fs_iam_entry_shift(p->at, 1, root);
+		end = e2fs_iam_entry_shift(p->entries, e2fs_iam_get_count(p->entries), root);
+		if (p->at < end)
+			break;
+		if (p == frames)
+			return 0;
+		num_frames++;
+		p--;
+	}
+
+	while (num_frames--) {
+		__u32 ptr;
+		unsigned int got;
+
+		ptr = e2fs_iam_get_block(root, p->at);
+                ext2fs_file_llseek(file, ptr * fs->blocksize, EXT2_SEEK_SET, NULL);
+		p++;
+                err = ext2fs_file_read(file, p->data, fs->blocksize, &got);
+		p->at = p->entries = e2fs_iam_get_entries(p->data, !(p == frames));
+	}
+	return 1;
+}
+
+#define DX_MAX_TREE_HEIGHT 5
+static int e2fs_iam_iterate(e2fsck_t ctx, ext2_ino_t inode, DB *fldb, 
+			    iam_entry_cb_t callback)
+			
+{
+	ext2_filsys fs = ctx->fs;
+	ext2_file_t e2_file;
+	struct iam_frame frames[DX_MAX_TREE_HEIGHT + 1] = {{0}};
+	struct iam_frame *frame;
+	struct iam_root *root;
+	int i;
+	int rc;
+	unsigned int got;
+	int count = 0;
+	__u32 ptr = 0;
+
+	for (i = 0; i < DX_MAX_TREE_HEIGHT + 1; i++) {
+		frames[i].data = e2fsck_allocate_memory(ctx, fs->blocksize,
+                                           	      "iam iterate buffer");
+		if (frames[i].data == NULL) {
+			rc = ENOMEM;
+			goto fail;
+		}
+	}	
+
+	/* Open the IAM file, Load and check the root*/
+	rc = ext2fs_file_open(fs, inode, 0, &e2_file);
+	if (rc)
+		goto fail;
+
+        rc = ext2fs_file_read(e2_file, frames[0].data, fs->blocksize, &got);
+        if (rc)
+		goto close;
+
+	if (got != fs->blocksize) {
+		rc = EIO;
+		goto close;
+	}
+	root = (struct iam_root *)frames[0].data;
+        if (root->ir_magic != IAM_ROOT_MAGIC) {
+		rc = EINVAL;
+		goto close;
+	}
+	ptr = 0;
+       	frames[0].curidx = ptr;
+	frames[0].entries = e2fs_iam_get_entries(&frames[0], 0);
+	/* Find the first leaf */
+	rc = e2fs_iam_find_leaf(fs, inode, e2_file, 0, frames, &frame, root);
+	if (rc)
+		goto close;
+
+	/* load the iam tree node */
+	while (1) {
+		/* read leaf node */
+		rc = e2fs_iam_fill_db(fs, frame, frames, e2_file, root, fldb, 
+				      callback);
+		if (rc < 0)
+			break;
+		count += rc;
+		rc = e2fs_iam_next_block(fs, root, e2_file, frame, frames);
+		if (rc == 0)
+			break;
+	}
+close:
+	rc = ext2fs_file_close(e2_file);
+fail:
+	for (i = 0; i < DX_MAX_TREE_HEIGHT + 1; i++)
+		if (frames[i].data)
+			ext2fs_free_mem((void *)(&frames[i].data));
+	return rc;
+
+}
+
+#define FLD   "fld"
+static int lfsck_get_fldb(e2fsck_t ctx)
+{
+	ext2_filsys fs = ctx->fs;
+	ext2_ino_t  fld_inode;
+	int rc = 0;
+	DB *fldb = NULL;
+	char *mdsdb;
+
+	assert(ctx->lustre_mds_files == 1);
+	mdsdb = ctx->lustre_mdsdb[0];
+	if (lfsck_opendb(mdsdb, MDS_FLDB, &fldb, 1,
+			 sizeof(seqno_t) + sizeof(struct lfsck_mds_fldb),
+			 0, DB_BTREE)) {
+		fprintf(stderr, "failure to open database %s \n", MDS_FLDB);
+		rc = -EINVAL;
+		goto out;
+	}
+	
+	rc = ext2fs_lookup(fs, EXT2_ROOT_INO, FLD, strlen(FLD), NULL,
+			   &fld_inode);
+	if (rc) {
+		ctx->flags |= E2F_FLAG_ABORT;
+		return (-ENOENT);
+	}
+	if (e2fs_iam_iterate(ctx, fld_inode, fldb, fldb_fill_cb)) {
+		ctx->flags |= E2F_FLAG_ABORT;
+		rc = -EIO;
+	}
+out:
+	if (fldb)
+		fldb->close(fldb, 0);
+	return rc;	
+}
+
+#define OID   "oi.16"
+static int lfsck_get_oidb(e2fsck_t ctx)
+{
+	ext2_filsys fs = ctx->fs;
+	ext2_ino_t  oi_inode;
+	int rc = 0;
+	DB *oidb = NULL;
+	char *mdsdb;
+	__u32 numfiles = fs->super->s_inodes_count -
+			 fs->super->s_free_inodes_count;
+
+	assert(ctx->lustre_mds_files == 1);
+	mdsdb = ctx->lustre_mdsdb[0];
+
+	if (lfsck_opendb(mdsdb, MDS_OIDB, &oidb, 1,
+			 sizeof(seqno_t) + sizeof(struct lu_fid), numfiles,
+			 DB_HASH)) {
+		fprintf(stderr, "failure to open database %s \n", MDS_OIDB);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	rc = ext2fs_lookup(fs, EXT2_ROOT_INO, OID, strlen(OID), NULL,
+			   &oi_inode);
+	if (rc) {
+		ctx->flags |= E2F_FLAG_ABORT;
+		return (-ENOENT);
+	}
+	if (e2fs_iam_iterate(ctx, oi_inode, oidb, oi_fill_cb)) {
+		ctx->flags |= E2F_FLAG_ABORT;
+		rc = -EIO;
+	}
+out:
+	if (oidb)
+		oidb->close(oidb, 0);
+	return rc;	
+}
+
+static int lfsck_mdt_save_slave(ext2_ino_t dir, int idx,
+				struct ext2_dir_entry *dirent, int offset,
+			  	int blocksize, char *buf, void *priv_data)
+{
+	struct ext2_dir_entry_2 *dirent2 = (struct ext2_dir_entry_2 *)dirent;
+	struct lfsck_mds_ctx  *lctx = priv_data;
+	e2fsck_t ctx = lctx->ctx; 
+	ext2_filsys fs = ctx->fs;
+	struct ext2_super_block *sb = fs->super;
+	struct ext2_inode_large *inode;
+	//struct ext2_inode *ext2_inode;
+	int inode_size = EXT2_INODE_SIZE(sb);
+	DB *dbp = lctx->outdb;
+	struct ext2_ext_attr_entry *entry;
+	char *len = dirent2->name + dirent2->name_len + 1 /* NUL terminator */;
+	struct lu_fid *fid;
+	__u32 *eamagic;
+	char *start, *end;
+	unsigned int storage_size, remain;
+	int rc = 0;
+	int min, max;
+
+
+	if (inode_size == EXT2_GOOD_OLD_INODE_SIZE)
+		/* this isn't large inode. so, nothing to check */
+		return 0;
+
+   	if ((((dirent2->name_len & 0xFF) == 1) && (dirent2->name[0] == '.')) ||
+	    (((dirent2->name_len & 0xFF) == 2) && (dirent2->name[0] == '.') &&
+	     (dirent2->name[1] == '.')))
+		return 0;
+
+	fid = (struct lu_fid *)(len + 1);
+
+	e2fsck_fid_be_to_cpu(fid, fid); 
+	
+	inode = (struct ext2_inode_large *)
+		     e2fsck_allocate_memory(lctx->ctx, inode_size, "scratch slave");
+	if (ext2fs_read_inode_full(fs, dirent->inode, (struct ext2_inode *)inode, 
+				   inode_size)) {
+		fprintf(stderr, "read inode failed for %s "DFID"\n", 
+			dirent2->name, PFID(fid));
+		goto out;
+	}
+
+	//inode = (struct ext2_inode_large *)ext2_inode;
+	/* i_extra_isize must cover i_extra_isize + i_pad1 at least */
+	min = sizeof(inode->i_extra_isize) + sizeof(inode->i_pad1);
+	max = inode_size - EXT2_GOOD_OLD_INODE_SIZE;
+
+	/*
+	 * For now we will allow i_extra_isize to be 0, but really
+	 * implementations should never allow i_extra_isize to be 0
+	 */
+	if (inode->i_extra_isize &&
+	    (inode->i_extra_isize < min || inode->i_extra_isize > max))
+		goto out;
+
+	eamagic = IHDR(inode);
+	if (*eamagic != EXT2_EXT_ATTR_MAGIC) {
+		fprintf(stderr, "no EA for %s "DFID" %u eamagic %x %d \n", dirent2->name, 
+			PFID(fid), dirent2->inode, *eamagic, inode->i_extra_isize);
+		goto out;
+	}
+	storage_size = inode_size - EXT2_GOOD_OLD_INODE_SIZE -
+		       inode->i_extra_isize;
+	start = ((char *) inode) + EXT2_GOOD_OLD_INODE_SIZE +
+		inode->i_extra_isize + sizeof(__u32);
+	end = (char *) inode + EXT2_INODE_SIZE(ctx->fs->super);
+	entry = (struct ext2_ext_attr_entry *) start;
+	remain = storage_size - sizeof(__u32);
+	while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
+		remain -= sizeof(struct ext2_ext_attr_entry);
+		
+		if (entry->e_name_index == EXT3_XATTR_INDEX_TRUSTED &&
+		    !strncmp(entry->e_name, XATTR_LUSTRE_MDS_LMV_EA, entry->e_name_len)) {
+			struct lmv_user_md *lmv = (struct lmv_user_md *)(start + 
+							    entry->e_value_offs);
+			struct lfsck_mds_stripe_ent stripe_ent;
+			DBT key, data;
+
+			letocpu_lmv_user_md(lmv);
+			if (lfsck_check_lmv_ea(ctx, lmv)) {
+				fprintf(stderr, "lmv check failed  for %s "DFID"\n", 
+				        dirent2->name, PFID(fid));
+				continue;
+			}
+
+			e2fsck_fid_cpu_to_le(&stripe_ent.mds_fid, fid);
+			stripe_ent.mds_mdtidx =
+				ext2fs_cpu_to_le32(lmv->lum_objects[0].lum_mds);
+			e2fsck_fid_cpu_to_le(&stripe_ent.mds_mfid,
+					     &lmv->lum_objects[0].lum_fid);
+			memset(&key, 0, sizeof(key));
+			memset(&data, 0, sizeof(data));
+			key.data = &fid;
+			key.size = sizeof(fid);
+			data.data = &stripe_ent;
+			data.size = sizeof(stripe_ent);
+			if ((rc = dbp->put(dbp, NULL, &key, &data, 0)) != 0)
+				dbp->err(dbp, rc, "db->put failed\n");
+		}
+		/* If EA value is stored in external inode then it does not
+		 * consume space here */
+		if (entry->e_value_inum == 0)
+			remain -= entry->e_value_size;
+
+		entry = EXT2_EXT_ATTR_NEXT(entry);
+	}
+out:		
+	ext2fs_free_mem(&inode);
+	return 0;
+}
+
+#define SLAVE_DIR "OBJ"	
+static int lfsck_get_slaves(e2fsck_t ctx)
+{
+	ext2_filsys fs = ctx->fs;
+	ext2_ino_t  dir;
+	int rc = 0;
+	DB *objdb = NULL;
+	char *mdsdb;
+	struct lfsck_mds_ctx lctx;
+	__u32 numfiles = fs->super->s_inodes_count -
+			 fs->super->s_free_inodes_count;
+
+	assert(ctx->lustre_mds_files == 1);
+	mdsdb = ctx->lustre_mdsdb[0];
+
+	if (lfsck_opendb(mdsdb, MDS_OBJDB, &objdb, 1,
+			 sizeof(struct lfsck_mds_stripe_ent) + 
+			 sizeof(struct lu_fid), numfiles,
+			 DB_HASH)) {
+		fprintf(stderr, "failure to open database %s \n", MDS_OBJDB);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	rc = ext2fs_lookup(fs, EXT2_ROOT_INO, SLAVE_DIR, strlen(SLAVE_DIR), 
+			   NULL, &dir);
+	if (rc)
+		return (-ENOENT);
+
+	lctx.outdb = objdb;
+	lctx.ctx = ctx;
+	rc = ext2fs_dir_iterate2(fs, dir , 0, NULL, lfsck_mdt_save_slave, &lctx);
+	if (rc) {
+		ctx->flags |= E2F_FLAG_ABORT;
+		rc = -EIO;
+	}
+out:
+	if (objdb)
+		objdb->close(objdb, 0);
+	return rc;
+}
 
 /*
  * On the mds save the fid and directory information for each file.
@@ -1301,19 +2193,22 @@ void e2fsck_pass6_mds(e2fsck_t ctx)
 	struct lfsck_mds_hdr mds_hdr;
 	DBT key, data;
 	DB *outdb = NULL, *dbhdr = NULL;
-	__u32 compat, rocompat, incompat, index;
+	__u32 compat, rocompat, incompat;
 	int rc, i;
+	char *mdsdb;
 
 	clear_problem_context(&pctx);
 
 	lctx.ctx = ctx;
-
+	
+	assert(ctx->lustre_mds_files == 1);
+	mdsdb = ctx->lustre_mdsdb[0];
 	/* Found no files with EA on filesystem - empty */
 	if (ctx->lfsck_oinfo == NULL) {
-		if (unlink(ctx->lustre_mdsdb)) {
+		if (unlink(mdsdb)) {
 			if (errno != ENOENT) {
 				fprintf(stderr, "Failure to remove old "
-					"db file %s\n", ctx->lustre_mdsdb);
+					"db file %s\n", mdsdb);
 				ctx->flags |= E2F_FLAG_ABORT;
 				goto out;
 			}
@@ -1338,19 +2233,38 @@ void e2fsck_pass6_mds(e2fsck_t ctx)
 		       sizeof(struct lfsck_ofile_ctx) * LOV_MAX_OSTS);
 	}
 
-	if (!(ctx->options & E2F_OPT_READONLY)) {
+	if (!(ctx->options & E2F_OPT_READONLY))
 		 lfsck_write_mds_hdrinfo(ctx, ctx->lfsck_oinfo);
+
+	rc = lfsck_get_fldb(ctx);
+	if (rc && rc != ENOENT) {
+		fprintf(stderr, "failure to get fldb \n");
+		ctx->flags |= E2F_FLAG_ABORT;
+		goto out; 
 	}
 
-	if (lfsck_opendb(ctx->lustre_mdsdb, MDS_DIRINFO, &outdb, 1,
+	rc = lfsck_get_oidb(ctx);
+	if (rc && rc != -ENOENT) {
+		fprintf(stderr, "failure to get oi.16 \n");
+		ctx->flags |= E2F_FLAG_ABORT;
+		goto out;
+	}
+
+	rc = lfsck_get_slaves(ctx);
+	if (rc && rc != -ENOENT) {
+		fprintf(stderr, "failure to get slaves \n");
+		ctx->flags |= E2F_FLAG_ABORT;
+		goto out;
+	}
+
+	if (lfsck_opendb(mdsdb, MDS_DIRINFO, &outdb, 1,
 			 sizeof(mds_dirent.mds_fid) + sizeof(mds_dirent),
 			 fs->super->s_inodes_count -
-			 fs->super->s_free_inodes_count)) {
+			 fs->super->s_free_inodes_count, DB_HASH)) {
 		fprintf(stderr, "failure to open database %s\n", MDS_DIRINFO);
 		ctx->flags |= E2F_FLAG_ABORT;
 		goto out;
 	}
-
 	lctx.outdb = outdb;
 	lctx.numfiles = 0;
 	lctx.dot = EXT2_ROOT_INO;
@@ -1381,14 +2295,15 @@ void e2fsck_pass6_mds(e2fsck_t ctx)
 	}
 
 	if (e2fsck_get_last_rcvd_info(ctx, &mds_hdr.mds_uuid, NULL, NULL,
-				      &index, &compat, &rocompat, &incompat)) {
+				      &mds_hdr.mds_index, &compat, &rocompat, &incompat)) {
 		fprintf(stderr, "Failure to read MDS last_rcvd file\n");
 		ctx->flags |= E2F_FLAG_ABORT;
 		goto out;
 	}
 
 	VERBOSE(ctx, "MDS: '%s' mdt idx %u: compat %#x rocomp %#x incomp %#x\n",
-		(char *)&mds_hdr.mds_uuid.uuid, index,compat,rocompat,incompat);
+		(char *)&mds_hdr.mds_uuid.uuid, mds_hdr.mds_index,compat,rocompat,
+		incompat);
 
 	if (compat & OBD_COMPAT_OST || incompat & OBD_INCOMPAT_OST) {
 		fprintf(stderr, "Found OST last_rcvd file doing MDS check\n");
@@ -1409,7 +2324,7 @@ void e2fsck_pass6_mds(e2fsck_t ctx)
 #endif
 	}
 
-	rc = lfsck_opendb(ctx->lustre_mdsdb, MDS_HDR, &dbhdr, 0, 0, 0);
+	rc = lfsck_opendb(mdsdb, MDS_HDR, &dbhdr, 0, 0, 0, DB_HASH);
 	if (rc != 0) {
 		fprintf(stderr, "failure to open database %s: %s\n", MDS_HDR,
 			db_strerror(rc));
diff -up old/e2fsprogs-1.41.12.2.ora1/e2fsck/unix.c ./e2fsprogs-1.41.12.2.ora1/e2fsck/unix.c
--- old/e2fsprogs-1.41.12.2.ora1/e2fsck/unix.c	2010-10-08 03:48:33.000000000 -0700
+++ ./e2fsprogs-1.41.12.2.ora1/e2fsck/unix.c	2011-01-09 22:57:21.074566687 -0800
@@ -396,8 +396,11 @@ static void check_if_skip(e2fsck_t ctx)
 	fputc('\n', stdout);
 	ext2fs_close(fs);
 	ctx->fs = NULL;
-	if (ctx->lustre_mdsdb)
-		free(ctx->lustre_mdsdb);
+	if (ctx->lustre_mdsdb) {
+		int i;
+		for (i = 0; i < ctx->lustre_mds_files; i++)
+			free(ctx->lustre_mdsdb[i]);
+	}
 	if (ctx->lustre_ostdb)
 		free(ctx->lustre_ostdb);
 	if (ctx->lfsck_oinfo)
@@ -832,35 +835,37 @@ static errcode_t PRS(int argc, char *arg
 				long_options, &option_index)) != EOF)
 		switch (c) {
 		case 1: {
-			char *dbpath, *tmp;
+			char *dbpath, *p, *mdt_path;
+			char tmp[PATH_MAX];
 
 			if (!optarg)
 	                        usage(ctx);
+			p = optarg;
+                        fprintf(stdout, "MDSDB[%u]: %s\n", ctx->lustre_mds_files, optarg);
+			do {
+				dbpath = malloc(PATH_MAX);
+				if (dbpath == NULL) {
+					fprintf(stderr, "Out of memory\n");
+					exit(1);
+				}
+			        mdt_path = strsep(&p, ",");
+				strcpy(tmp, optarg);
+				if (realpath(my_dirname(tmp), dbpath) == NULL) {
+					int i;
+					fprintf(stderr, "Failure to resolve path %s\n",
+						optarg);
+                                        for (i = 0; i < ctx->lustre_mds_files; i++)
+                                                free(ctx->lustre_mdsdb[i]);
+					exit(1);
+				}
+
+				strcpy(tmp, mdt_path);
+				sprintf(dbpath+strlen(dbpath), "/%s", my_basename(tmp));
+				ctx->lustre_mdsdb[ctx->lustre_mds_files] = dbpath;
+				ctx->lustre_mds_files ++;
+				ctx->lustre_devtype |= LUSTRE_MDS;
 
-			dbpath = malloc(PATH_MAX);
-			if (dbpath == NULL) {
-				fprintf(stderr, "Out of memory\n");
-				exit(1);
-			}
-			tmp = malloc(PATH_MAX);
-			if (tmp == NULL) {
-				fprintf(stderr, "Out of memory\n");
-				exit(1);
-			}
-
-			strcpy(tmp, optarg);
-			if (realpath(my_dirname(tmp), dbpath) == NULL) {
-				fprintf(stderr, "Failure to resolve path %s\n",
-					optarg);
-				exit(1);
-			}
-
-			strcpy(tmp, optarg);
-			sprintf(dbpath+strlen(dbpath), "/%s", my_basename(tmp));
-			ctx->lustre_mdsdb = dbpath;
-			ctx->lustre_devtype |= LUSTRE_MDS;
-
-			free(tmp);
+			} while (p != NULL);
 			break;
 		}
 		case 2: {
@@ -1823,8 +1828,11 @@ no_journal:
 	free(ctx->journal_name);
 	if (ctx->lfsck_oinfo)
 		e2fsck_lfsck_cleanupdb(ctx);
-	if (ctx->lustre_mdsdb)
-		free(ctx->lustre_mdsdb);
+	if (ctx->lustre_mdsdb) {
+		int i;
+		for (i = 0; i < ctx->lustre_mds_files; i++)
+			free(ctx->lustre_mdsdb[i]);
+	}
 	if (ctx->lustre_ostdb)
 		free(ctx->lustre_ostdb);