commit c3091cc1047d691fb7803422728bfcb69d58f474
Author: Takamura Tatsushi <takamr.tatsushi@jp.fujitsu.com>
Date:   Thu Nov 1 17:07:01 2018 +0900

    issue#1 DL-SNAP: Directory Level Snapshot
    
    DL-SNAP is a feature designed for directory level file backups.
    
    It is implemented on top of lustre ldiskfs without modification of ext4 disk format,
    though we set a special original flag to FEATURE of the superblock
    in order to check whether DL-SNAP is enabled.
    
    The feature makes execution of e2fsck command failed although
    no modification to ext2 file system structure.
    
    DL-SNAP uses COW(Copy On Write) mechanism to reduce backup time and storage usage.vim lu_attr
    When we create a snapshot, only inodes are created. At this point,
    new data blocks are not allocated on OST.
    Data blocks of snapshot are allocated when original file is modified.
    
    Not only root users but also ordinary users can create snapshots.
    Users can create a snapshot of a directory using lfs command with snapshot option,
    and restore files by usual process such as cp command.

diff --git a/ldiskfs/Makefile.in b/ldiskfs/Makefile.in
index bc3f058..a5d95f8 100644
--- a/ldiskfs/Makefile.in
+++ b/ldiskfs/Makefile.in
@@ -18,9 +18,11 @@ ext3_new_headers := ext3_extents.h
 ext4_new_sources := fiemap.h mmp.c
 ext4_new_sources += htree_lock.c
 ext4_new_headers :=
+snapshot_new_sources := snapshot.c snapshot.h snapshot_debug.h
 
 new_sources := $(ext4_new_sources)
 new_headers := $(ext4_new_headers)
+new_sources += $(snapshot_new_sources)
 
 ldiskfs_patched_sources := $(notdir $(backfs_sources) $(backfs_headers)) $(new_sources) $(new_headers)
 ldiskfs_sources := $(ldiskfs_patched_sources)
diff --git a/ldiskfs/kernel_patches/patches/rhel6.5/dl_snapshot.patch b/ldiskfs/kernel_patches/patches/rhel6.5/dl_snapshot.patch
new file mode 100644
index 0000000..1b6d786
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel6.5/dl_snapshot.patch
@@ -0,0 +1,3494 @@
+diff -urN -x .svn linux-stage.org/fs/ext4/ext4.h linux-stage/fs/ext4/ext4.h
+--- linux-stage.org/fs/ext4/ext4.h	2018-08-31 20:53:57.000000000 +0900
++++ linux-stage/fs/ext4/ext4.h	2018-09-03 14:15:26.000000000 +0900
+@@ -326,6 +326,9 @@
+ #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
+ #define EXT4_EA_INODE_FL		0x00200000 /* Inode used for large EA */
+ #define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
++#define EXT4_SNAPSHOT_SHARE_FL	0x01000000 /* snapshot data share */
++#define EXT4_SNAPSHOT_SP_FL		0x04000000 /* snapshot flag */
++#define EXT4_SNAPSHOT_FL		0x08000000 /* snapshot file/dir */
+ #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
+ 
+ #define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
+@@ -382,6 +385,9 @@
+ 	EXT4_INODE_EXTENTS	= 19,	/* Inode uses extents */
+ 	EXT4_INODE_EA_INODE	= 21,	/* Inode used for large EA */
+ 	EXT4_INODE_EOFBLOCKS	= 22,	/* Blocks allocated beyond EOF */
++	EXT4_INODE_SNAPSHOT_SHARE	= 24,	/* snapshot data share */
++	EXT4_INODE_SNAPSHOT_SP	= 26,	/* snapshot flag */
++	EXT4_INODE_SNAPSHOT	= 27,	/* snapshot file/dir */
+ 	EXT4_INODE_RESERVED	= 31,	/* reserved for ext4 lib */
+ };
+ 
+@@ -1342,6 +1348,7 @@
+ #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK	0x0020
+ #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE	0x0040
+ #define EXT4_FEATURE_RO_COMPAT_QUOTA		0x0100
++#define EXT4_FEATURE_RO_COMPAT_SNAPSHOT		0x40000000
+ 
+ #define EXT4_FEATURE_INCOMPAT_COMPRESSION	0x0001
+ #define EXT4_FEATURE_INCOMPAT_FILETYPE		0x0002
+@@ -1375,7 +1382,9 @@
+ 					 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
+ 					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
+ 					 EXT4_FEATURE_RO_COMPAT_HUGE_FILE| \
+-					 EXT4_FEATURE_RO_COMPAT_QUOTA)
++					 EXT4_FEATURE_RO_COMPAT_QUOTA| \
++					 EXT4_FEATURE_RO_COMPAT_SNAPSHOT)
++
+ 
+ /*
+  * Default values for user and/or group using reserved blocks
+@@ -1772,6 +1781,8 @@
+        struct super_block *sb;  /* super block of the fs */
+ };
+ 
++struct ext4_snapshot_gen_lock;
++
+ /*
+  * Check interval multiplier
+  * The MMP block is written every update interval and initially checked every
+@@ -1956,6 +1967,8 @@
+ extern int flush_aio_dio_completed_IO(struct inode *inode);
+ extern void ext4_da_update_reserve_space(struct inode *inode,
+ 					int used, int quota_claim);
++extern int ext4_snapshot_orphan_truncate(struct inode *inode,
++			    struct ext4_snapshot_gen_lock **lock);
+ /* ioctl.c */
+ extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
+ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
+@@ -2317,6 +2330,16 @@
+ /* mmp.c */
+ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
+ 
++/* snapshot.c */
++extern int ext4_snapshot_get_enable(struct super_block *sb);
++extern int ext4_snapshot_set_enable(struct super_block *sb);
++extern int ext4_snapshot_clone(struct inode *snap, struct inode *orig);
++extern int ext4_snapshot_destroy(struct inode *inode, void *orig_fid);
++extern int ext4_snapshot_get_orphan(struct inode *inode, void *fid_buf,
++				       int *array_num);
++extern int ext4_snapshot_get_old_list(struct inode *inode, void *buf);
++extern int ext4_snapshot_list_orphan(struct super_block *sb, void *buf);
++extern int ext4_snapshot_set_del_flag(struct inode *inode);
+ /*
+  * Add new method to test wether block and inode bitmaps are properly
+  * initialized. With uninit_bg reading the block from disk is not enough
+diff -urN -x .svn linux-stage.org/fs/ext4/ext4_extents.h linux-stage/fs/ext4/ext4_extents.h
+--- linux-stage.org/fs/ext4/ext4_extents.h	2018-08-31 20:53:57.000000000 +0900
++++ linux-stage/fs/ext4/ext4_extents.h	2018-09-03 14:15:27.000000000 +0900
+@@ -135,6 +135,11 @@
+ #define EXT_BREAK      1
+ #define EXT_REPEAT     2
+ 
++#define WRITE_PREP_OP 0
++#define WRITE_COMMIT_OP 1
++#define READ_OP 2
++#define WRITE_COPY_OP 3
++
+ /*
+  * structure for external API
+  */
+diff -urN -x .svn linux-stage.org/fs/ext4/extents.c linux-stage/fs/ext4/extents.c
+--- linux-stage.org/fs/ext4/extents.c	2018-08-31 20:53:57.000000000 +0900
++++ linux-stage/fs/ext4/extents.c	2018-10-24 14:01:03.000000000 +0900
+@@ -42,7 +42,8 @@
+ #include <asm/uaccess.h>
+ #include <linux/fiemap.h>
+ #include "ext4_jbd2.h"
+-#include "ext4_extents.h"
++#include "mballoc.h"
++#include "snapshot.h"
+ 
+ /*
+  * used by extent splitting.
+@@ -337,6 +338,12 @@
+ 
+ 	if (len == 0)
+ 		return 0;
++
++	/* sparse extent for snapshot file */
++	if (EXT4_TEST_OST_SNAPSHOT_FILE(inode) &&
++	    EXT4_SNAPSHOT_TEST_SPARSE_EXTENT(ext))
++		return 1;
++
+ 	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
+ }
+ 
+@@ -1296,7 +1303,10 @@
+ 	}
+ 
+ 	*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
+-	*phys = ext4_ext_pblock(ex) + ee_len - 1;
++	if (!(EXT4_SNAPSHOT_TEST_SPARSE_EXTENT(ex) &&
++	      EXT4_TEST_OST_SNAPSHOT_FILE(inode)))
++		*phys = ext4_ext_pblock(ex) + ee_len - 1;
++
+ 	return 0;
+ }
+ 
+@@ -1585,6 +1595,10 @@
+ 		return 0;
+ #endif
+ 
++	if (EXT4_TEST_OST_SNAPSHOT_FILE(inode) &&
++	    EXT4_SNAPSHOT_TEST_SPARSE_EXTENT(ex1) &&
++	    EXT4_SNAPSHOT_TEST_SPARSE_EXTENT(ex2))
++		return 1;
+ 	if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
+ 		return 1;
+ 	return 0;
+@@ -1889,6 +1903,10 @@
+ 	return err;
+ }
+ 
++static int ext4_snapshot_read(struct inode *inode,
++				 struct ext4_ext_cache *cbex,
++				 int *exists, int *flags);
++
+ static int ext4_fill_fiemap_extents(struct inode *inode,
+ 				    ext4_lblk_t block, ext4_lblk_t num,
+ 				    struct fiemap_extent_info *fieinfo)
+@@ -1972,6 +1990,16 @@
+ 			cbex.ec_block = start;
+ 			cbex.ec_len = end - start;
+ 			cbex.ec_start = 0;
++			if (EXT4_TEST_OST_SNAPSHOT_FILE(inode)) {
++				up_read(&EXT4_I(inode)->i_data_sem);
++				/* search for extents from new snapshots */
++				err = ext4_snapshot_read(inode, &cbex,
++							    &exists, &flags);
++				if (err < 0)
++					break;
++				down_read(&EXT4_I(inode)->i_data_sem);
++			}
++
+ 		} else {
+ 			cbex.ec_block = le32_to_cpu(ex->ee_block);
+ 			cbex.ec_len = ext4_ext_get_actual_len(ex);
+@@ -2331,6 +2359,17 @@
+ 	unsigned short ee_len =  ext4_ext_get_actual_len(ex);
+ 	int i, metadata = 0, flags =0;
+ 
++	if (EXT4_TEST_OST_SNAPSHOT_FILE(inode) &&
++	    EXT4_SNAPSHOT_TEST_SPARSE_EXTENT(ex)) {
++		CDEBUG(D_INODE, "sparse extent [%u ->%u] "
++		       "held by snapshot(=%lu),"
++		       " it has no block\n",
++		       le32_to_cpu(ex->ee_block),
++		       le32_to_cpu(ex->ee_block) + ee_len - 1,
++		       inode->i_ino);
++		return 0;
++	}
++
+ 	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ 		metadata = 1;
+ 		flags = EXT4_FREE_BLOCKS_METADATA;
+@@ -4487,6 +4526,1212 @@
+ 	return (error < 0 ? error : 0);
+ }
+ 
++/*
++ * ext4_snapshot_wait_writeback
++ *
++ * wait for writeback the un-written cache data
++ *
++ * \param[in]	inode		inode
++ * \param[in]	offset		file offset for read
++ * \param[in]	len		data size for read
++ *
++ * \retval	none(void)
++ */
++static void ext4_snapshot_wait_writeback(struct inode *inode,
++	loff_t offset, ssize_t len)
++{
++	struct page	*page;
++
++	while (len > 0) {
++		int poff = offset & (PAGE_CACHE_SIZE - 1);
++		int plen = PAGE_CACHE_SIZE - poff;
++
++		page = find_lock_page(inode->i_mapping, offset);
++		if (page) {
++			wait_on_page_writeback(page);
++			/* unlock and release cache */
++			unlock_page(page);
++			page_cache_release(page);
++		}
++		/* next offset */
++		offset += plen;
++		len -= plen;
++	}
++	return;
++}
++
++static int ext4_snapshot_read(struct inode *inode,
++				 struct ext4_ext_cache *cbex,
++				 int *exists, int *flags)
++{
++	struct ext4_ext_path	*path = NULL, *p = NULL;
++	struct inode *new_inode, *cur;
++	struct ext4_snapshot_link link, next_link;
++	struct ext4_snapshot_link *cur_link_buf = NULL,
++		*next_link_buf = NULL, *backup = NULL;
++	struct ext4_extent *ex;
++	ext4_lblk_t	next, end;
++	int depth = 0, err;
++
++	err = ext4_snapshot_get_link(inode, &link);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		return err;
++	}
++	/* check existence of more new snapshot */
++	if (!link.new_ino)
++		return 0;
++
++	cur_link_buf = &link;
++	next_link_buf = &next_link;
++
++	/* get snapshot new link data & new inode from xattr */
++	new_inode = ext4_snapshot_read_link(inode, cur_link_buf,
++					       next_link_buf, true, &err);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		return err;
++	}
++	cur = new_inode;
++	while (cur != NULL) {
++		err = 0;
++
++		/* extent read lock */
++		down_read(&EXT4_I(cur)->i_data_sem);
++
++		if (path && ext_depth(cur) != depth) {
++			/* depth was changed. we have to realloc path */
++			kfree(path);
++			path = NULL;
++		}
++
++		/* find extent */
++		p = ext4_ext_find_extent(cur, cbex->ec_block,
++					    path);
++		if (IS_ERR(p)) {
++			err = PTR_ERR(p);
++			CERROR("cannot find extent on inode=%lu.\n",
++				cur->i_ino);
++			/* extent read unlock and release cur */
++			up_read(&EXT4_I(cur)->i_data_sem);
++			break;
++		}
++
++		path = p;
++		depth = ext_depth(cur);
++		if (unlikely(path[depth].p_hdr == NULL)) {
++			up_read(&EXT4_I(cur)->i_data_sem);
++			EXT4_ERROR_INODE(cur, "path[%d].p_hdr == NULL",
++					 depth);
++			err = -EIO;
++			break;
++		}
++		/* get found extent pointer */
++		ex = NULL;
++		ex = path[depth].p_ext;
++
++		next = ext4_ext_next_allocated_block(path);
++		/* extent read lock */
++		up_read(&EXT4_I(cur)->i_data_sem);
++
++		if (ex) {
++			ext4_lblk_t ee_block =
++				le32_to_cpu(ex->ee_block);
++			ext4_lblk_t ee_end =
++				ee_block + ext4_ext_get_actual_len(ex);
++
++			if (cbex->ec_block < ee_block) {
++				/*extent is higher than block */
++				end = ee_block;
++				if (end < cbex->ec_block + cbex->ec_len)
++					cbex->ec_len = end - cbex->ec_block;
++			} else if (ee_end <= cbex->ec_block) {
++				/* extent is lower than block */
++				end = cbex->ec_block + cbex->ec_len;
++				if (end >= next)
++					cbex->ec_len = next - cbex->ec_block;
++			} else if (ee_block <= cbex->ec_block) {
++				/* block is exist on extent */
++				loff_t		offset, flen;
++
++				cbex->ec_start = ext4_ext_pblock(ex) +
++					(cbex->ec_block - ee_block);
++
++				end = ee_end;
++				if (cbex->ec_block + cbex->ec_len > end)
++					cbex->ec_len = end - cbex->ec_block;
++				/* flush un-written cache data to disk */
++				offset = cbex->ec_block *
++					EXT4_BLOCK_SIZE(inode->i_sb);
++				flen = cbex->ec_len *
++					EXT4_BLOCK_SIZE(inode->i_sb);
++				ext4_snapshot_wait_writeback(cur,
++								offset, flen);
++				*exists = 1;
++				if (flags &&
++				    EXT4_SNAPSHOT_TEST_SPARSE_EXTENT(ex))
++					*flags |= FIEMAP_EXTENT_UNWRITTEN;
++				break;
++			}
++		}
++		ext4_ext_drop_refs(path);
++
++		backup = cur_link_buf;
++		cur_link_buf = next_link_buf;
++		next_link_buf = backup;
++
++		/* get snapshot link data from xattr */
++
++		new_inode = ext4_snapshot_read_link(cur,
++						       cur_link_buf,
++						       next_link_buf,
++						       true, &err);
++
++		if (err) {
++			SNAPSHOT_CONSOLE_ERR(err);
++			break;
++		}
++		/* release new_inode for next inode */
++		iput(cur);
++		cur = new_inode;
++	}
++	if (path) {
++		ext4_ext_drop_refs(path);
++		kfree(path);
++	}
++
++	/* release new_inode for exit */
++	if (cur)
++		iput(cur);
++	return err;
++}
++
++/*
++ *  read block from source, and write block to destination
++ *
++ * \param[in]	handle		journal handle
++ * \param[in]	sb		super block
++ * \param[in]	src		source block
++ * \param[in]	dest		destination block
++ * \param[in]	count		number of write blocks
++ *
++ * \retval	0		success
++ * \retval	less than 0	failure (-errno)
++ */
++
++struct snapshot_bio_wait {
++	wait_queue_head_t  sb_wait;
++	atomic_t sb_count;
++	int sb_err;
++};
++
++/* reference mpage_end_io_read() */
++static void snapshot_end_io_read(struct bio *bio, int err)
++{
++	struct snapshot_bio_wait *snapshot_bio_wait = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++
++		if (uptodate) {
++			ClearPageError(page);
++			SetPageUptodate(page);
++		} else {
++			CERROR("page=%p index=%lu devno=%u,%u fail to read\n",
++			       page, page->index, MAJOR(bio->bi_bdev->bd_dev),
++			       MINOR(bio->bi_bdev->bd_dev));
++			SNAPSHOT_CONSOLE_ERR(err ? err : -EIO);
++			ClearPageUptodate(page);
++			SetPageError(page);
++			snapshot_bio_wait->sb_err = (err ? err : -EIO);
++		}
++	} while (bvec >= bio->bi_io_vec);
++
++	if (atomic_dec_and_test(&snapshot_bio_wait->sb_count))
++		wake_up(&snapshot_bio_wait->sb_wait);
++
++	bio_put(bio);
++}
++
++/* reference mpage_end_io_write() */
++static void snapshot_end_io_write(struct bio *bio, int err)
++{
++	struct snapshot_bio_wait *snapshot_bio_wait = bio->bi_private;
++	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
++	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
++
++	do {
++		struct page *page = bvec->bv_page;
++		if (--bvec >= bio->bi_io_vec)
++			prefetchw(&bvec->bv_page->flags);
++
++		if (!uptodate) {
++			SNAPSHOT_CONSOLE_ERR(err ? err : -EIO);
++			CERROR("page=%p index=%lu devno=%u,%u fail to write\n",
++			       page, page->index, MAJOR(bio->bi_bdev->bd_dev),
++			       MINOR(bio->bi_bdev->bd_dev));
++			snapshot_bio_wait->sb_err = (err ? err : -EIO);
++		}
++
++		unlock_page(page);
++		page_cache_release(page);
++	} while (bvec >= bio->bi_io_vec);
++
++	if (atomic_dec_and_test(&snapshot_bio_wait->sb_count))
++		wake_up(&snapshot_bio_wait->sb_wait);
++
++	bio_put(bio);
++}
++
++static int can_be_merged(struct bio *bio, sector_t sector)
++{
++	unsigned int size = bio->bi_size >> 9;
++	return bio->bi_sector + size == sector ? 1 : 0;
++}
++static int ext4_snapshot_do_bio(struct page *page,
++				   struct inode *inode,
++				   int nblocks,
++				   ext4_fsblk_t pblock,
++				   int rw,
++				   struct bio    **bio_p,
++				   struct snapshot_bio_wait *snapshot_bio_wait,
++				   int alloc_iovecs)
++{
++	unsigned int   blocksize =  inode->i_sb->s_blocksize;
++	sector_t       sector;
++	int	    sector_bits = inode->i_sb->s_blocksize_bits - 9;
++	struct bio    *bio = *bio_p;
++	struct bio    *tmp_bio = NULL;
++
++	sector = (sector_t)pblock << sector_bits;
++	if (bio != NULL &&
++	    can_be_merged(bio, sector) &&
++	    bio_add_page(bio, page,
++			 blocksize * nblocks, 0) != 0) {
++		return 0;
++	}
++	if (bio != NULL) {
++		atomic_inc(&snapshot_bio_wait->sb_count);
++		submit_bio(rw, bio);
++		*bio_p = NULL;
++	}
++	/* allocate new bio */
++	tmp_bio = bio_alloc(GFP_NOIO, alloc_iovecs);
++	if (tmp_bio == NULL) {
++		CERROR("Can't allocate bio\n");
++		return -ENOMEM;
++	}
++	tmp_bio->bi_bdev = inode->i_sb->s_bdev;
++	tmp_bio->bi_sector = sector;
++	tmp_bio->bi_rw = rw;
++	if (rw == READ)
++		tmp_bio->bi_end_io = snapshot_end_io_read;
++	else
++		tmp_bio->bi_end_io = snapshot_end_io_write;
++
++	tmp_bio->bi_private = snapshot_bio_wait;
++	LASSERT(bio_add_page(tmp_bio, page,
++			     blocksize * nblocks, 0) != 0);
++	*bio_p = tmp_bio;
++	return 0;
++}
++
++static void init_snapshot_bio_wait(struct snapshot_bio_wait *snapshot_bio_wait)
++{
++	init_waitqueue_head(&snapshot_bio_wait->sb_wait);
++	atomic_set(&snapshot_bio_wait->sb_count, 0);
++	snapshot_bio_wait->sb_err = 0;
++}
++
++static int ext4_snapshot_submit_last_bio(struct bio *bio,
++					    struct snapshot_bio_wait *snapshot_bio_wait)
++{
++	atomic_inc(&snapshot_bio_wait->sb_count);
++	submit_bio(bio->bi_rw, bio);
++	wait_event(snapshot_bio_wait->sb_wait,
++		   atomic_read(&snapshot_bio_wait->sb_count) == 0);
++	return snapshot_bio_wait->sb_err;
++}
++
++static int ext4_snapshot_copy_page(struct inode *src_inode,
++				      struct inode *dest_inode,
++				      ext4_lblk_t start_block,
++				      ext4_lblk_t end_block,
++				      ext4_fsblk_t src,
++				      ext4_fsblk_t dest)
++{
++	struct super_block *sb = dest_inode->i_sb;
++	int i, j = 0, n = 0;
++	unsigned int   blocksize =  sb->s_blocksize;
++	int blocks_per_page = PAGE_CACHE_SIZE / blocksize;
++	struct snapshot_bio_wait snapshot_bio_wait;
++	struct page *src_page = NULL;
++	bool lock = false;
++	int nblocks;
++	struct bio    *bio = NULL;
++	int rc = 0;
++	struct page **pages;
++
++	pages = kmalloc(sizeof(struct page *) * (end_block - start_block),
++			GFP_NOFS);
++	if (!pages) {
++		CERROR("fail to alloc pages\n");
++		SNAPSHOT_CONSOLE_ERR(-ENOMEM);
++		return -ENOMEM;
++	}
++	init_snapshot_bio_wait(&snapshot_bio_wait);
++	for (i = start_block; i < end_block;) {
++		int pnum = (i / blocks_per_page);
++		int poff = (i % blocks_per_page);
++		nblocks  = blocks_per_page - poff;
++
++		src_page = find_get_page(src_inode->i_mapping,
++				     pnum);
++		if (src_page == NULL) {
++		retry:
++			src_page = find_or_create_page(src_inode->i_mapping,
++						       pnum,
++						       GFP_NOFS | __GFP_HIGHMEM);
++			if (unlikely(src_page == NULL)) {
++				CERROR("fail to get src_page "
++				       "for src_inode=%lu\n",
++				       src_inode->i_ino);
++				rc =  -ENOMEM;
++				goto out_src_release;
++			}
++			lock = true;
++		} else if (!PageLocked(src_page)) {
++			CDEBUG(D_INODE, "inode=%lu page is not "
++			       "locked src_page =%p, "
++			       "flags=%ld src_page->index=%lu\n",
++			       src_inode->i_ino, src_page,
++			       src_page->flags, src_page->index);
++			page_cache_release(src_page);
++			goto retry;
++		}
++		wait_on_page_writeback(src_page);
++
++		if (!PageUptodate(src_page)) {
++			CDEBUG(D_INODE, "inode=%lu NOT on cache src_page =%p, "
++			       "flags=%ld src_page->index=%lu\n",
++			       src_inode->i_ino, src_page,
++			       src_page->flags, src_page->index);
++
++			rc = ext4_snapshot_do_bio(src_page,
++						     src_inode, nblocks,
++						     src + (i - start_block),
++						     READ, &bio,
++						     &snapshot_bio_wait,
++						     end_block - i);
++			if (rc < 0) {
++				SNAPSHOT_CONSOLE_ERR(rc);
++				goto out_bio_wait;
++			}
++		}
++		pages[j++] = src_page;
++		i += nblocks;
++	}
++	if (bio != NULL) {
++		rc = ext4_snapshot_submit_last_bio(bio,
++						      &snapshot_bio_wait);
++		if (rc) {
++			CERROR("fail to read bio "
++			       "rc =%d src_inode=%lu\n",
++			       rc, src_inode->i_ino);
++			SNAPSHOT_CONSOLE_ERR(rc);
++			goto out_src_release;
++		}
++	}
++	init_snapshot_bio_wait(&snapshot_bio_wait);
++	bio = NULL;
++
++	for (i = start_block; i < end_block;) {
++		void *src_addr = NULL, *dest_addr = NULL;
++		struct page *src_page = NULL, *dest_page = NULL;
++		int pnum = (i / blocks_per_page);
++		int poff = (i % blocks_per_page);
++		nblocks  = blocks_per_page - poff;
++
++		LASSERT(n < j);
++		src_page = pages[n++];
++
++		LASSERT(PageUptodate(src_page) && !PageError(src_page));
++		src_addr = kmap(src_page) + (poff * sb->s_blocksize);
++
++		/* get destination block */
++		dest_page = find_or_create_page(dest_inode->i_mapping, pnum,
++						GFP_NOFS | __GFP_HIGHMEM);
++		if (unlikely(dest_page == NULL)) {
++			CERROR("fail to get dest_page for dest_inode=%lu\n",
++			       dest_inode->i_ino);
++
++			kunmap(src_page);
++			if (lock)
++				unlock_page(src_page);
++			page_cache_release(src_page);
++			rc = -ENOMEM;
++			goto out_bio_wait;
++		}
++		LASSERT(src_page->index == dest_page->index);
++		wait_on_page_writeback(dest_page);
++		dest_addr = kmap(dest_page) + (poff * blocksize);
++
++		memcpy(dest_addr, src_addr, nblocks * blocksize);
++
++		kunmap(dest_page);
++		kunmap(src_page);
++
++		if (lock)
++			unlock_page(src_page);
++		page_cache_release(src_page);
++
++		/* write */
++		SetPageUptodate(dest_page);
++		rc = ext4_snapshot_do_bio(dest_page, dest_inode,
++					     nblocks, dest + (i - start_block),
++					     WRITE, &bio, &snapshot_bio_wait,
++					     end_block - i);
++		if (rc) {
++			unlock_page(dest_page);
++			page_cache_release(dest_page);
++			SNAPSHOT_CONSOLE_ERR(rc);
++			goto out_bio_wait;
++		}
++		i += nblocks;
++	}
++	if (bio != NULL) {
++		rc = ext4_snapshot_submit_last_bio(bio,
++						      &snapshot_bio_wait);
++		if (rc) {
++			CERROR("fail to write bio "
++			       "rc =%d src_inode=%lu\n",
++			       rc, src_inode->i_ino);
++			SNAPSHOT_CONSOLE_ERR(rc);
++		}
++	}
++	kfree(pages);
++	return rc;
++
++out_bio_wait:
++	wait_event(snapshot_bio_wait.sb_wait,
++		   atomic_read(&snapshot_bio_wait.sb_count) == 0);
++	if (bio)
++		bio_put(bio);
++out_src_release:
++	while (n < j) {
++		src_page = pages[n++];
++		if (lock)
++			unlock_page(src_page);
++		page_cache_release(src_page);
++
++	}
++	kfree(pages);
++	return rc;
++
++}
++
++static int ext4_snapshot_copy_data(struct inode *src_inode,
++				      struct inode *dest_inode,
++				      ext4_lblk_t start_block,
++				      ext4_fsblk_t src,
++				      ext4_fsblk_t dest,
++				      unsigned long total_count)
++{
++	int blocks_per_page = PAGE_CACHE_SIZE / src_inode->i_sb->s_blocksize;
++	int count;
++	int rc = 0;
++
++	while (total_count > 0) {
++		if (total_count <=  BIO_MAX_PAGES * blocks_per_page)
++			return ext4_snapshot_copy_page(src_inode,
++							  dest_inode,
++							  start_block,
++							  start_block + total_count,
++							  src, dest);
++		else {
++			count = BIO_MAX_PAGES * blocks_per_page;
++			rc =  ext4_snapshot_copy_page(src_inode,
++							 dest_inode,
++							 start_block,
++							 start_block + count,
++							 src, dest);
++			if (rc)
++				return rc;
++			total_count -= count;
++			start_block += count;
++			src += count;
++			dest += count;
++		}
++	}
++	return 0;
++}
++
++static int ext4_snapshot_do_copy_extent(handle_t *handle,
++					   struct inode *src_inode,
++					   struct inode *inode,
++					   struct ext4_ext_path *path,
++					   ext4_lblk_t *start_block,
++					   ext4_lblk_t end_block,
++					   ext4_fsblk_t *start_pblock)
++{
++	struct ext4_extent newext;
++	struct ext4_allocation_request ar;
++	int err = 0;
++	ext4_fsblk_t newblock;
++	ext4_lblk_t copy_blocks = (end_block - *start_block);
++
++	/* reference ext4_ext_get_blocks() */
++	ar.lleft = *start_block;
++	err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
++	if (err)
++		return err;
++
++	ar.lright = *start_block;
++	err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
++	if (err)
++		return err;
++
++	ar.inode = inode;
++	ar.goal = ext4_ext_find_goal(inode, path, *start_block);
++	ar.logical = *start_block;
++	ar.len = copy_blocks;
++	ar.flags = EXT4_MB_HINT_DATA;
++
++	newblock = ext4_mb_new_blocks(handle, &ar, &err);
++	if (!newblock) {
++		CERROR("fail to alloc blocks "
++		       "goal=%llu len=%u inode=%lu err=%d\n",
++		       ar.goal, ar.len, inode->i_ino, err);
++		return err;
++	}
++	newext.ee_block = cpu_to_le32(*start_block);
++	ext4_ext_store_pblock(&newext, newblock);
++	newext.ee_len = cpu_to_le16(ar.len);
++
++	err = ext4_snapshot_copy_data(src_inode, inode,
++					 *start_block,
++					 *start_pblock, newblock,
++					 ar.len);
++	if (err)
++		goto out_free;
++
++	err = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
++	if (err) {
++		CERROR("fail to insert extent inode=%lu err=%d\n",
++		       inode->i_ino, err);
++		goto out_free;
++	}
++
++	*start_block += ar.len;
++	*start_pblock += ar.len;
++
++ out_free:
++	if (err) {
++		ext4_discard_preallocations(inode);
++		ext4_free_blocks(handle, inode, newblock, ar.len, 0);
++	}
++
++	return err;
++}
++
++static int ext4_snapshot_copy_extent(handle_t *handle,
++					struct inode *src_inode,
++					struct inode *inode,
++					struct ext4_ext_path *path,
++					ext4_lblk_t start_block,
++					ext4_lblk_t end_block,
++					ext4_fsblk_t start_pblock)
++{
++	int err = 0;
++	unsigned int credits;
++
++	while (start_block < end_block) {
++		credits = ext4_chunk_trans_blocks(inode, end_block - start_block);
++		if (unlikely(!ext4_handle_has_enough_credits(handle, credits))) {
++			err = ext4_journal_extend(handle, credits);
++			if (err < 0) {
++				CERROR("couldn't extend journal inode=%lu "
++				       "handle=%p need=%d has=%d err=%d\n",
++				       inode->i_ino, handle, credits,
++				       handle->h_buffer_credits, err);
++				return err;
++			} else if (err != 0) {
++				err = ext4_journal_restart(handle, credits);
++				if (err) {
++					/*
++					 * This should never happen.
++					 * It may panic with ext4_journal_stop()
++					 * if ext4_journal_restart() fails.
++					 */
++					CERROR("couldn't extend and restart "
++					       "journal inode=%lu handle=%p "
++					       "need=%d err=%d\n",
++					       inode->i_ino, handle, credits,
++					       err);
++					return err;
++				}
++			}
++		}
++		err = ext4_snapshot_do_copy_extent(handle,
++						      src_inode, inode,
++						      path, &start_block,
++						      end_block,
++						      &start_pblock);
++		if (err)
++			return err;
++	}
++	return err;
++}
++
++static int ext4_snapshot_insert_sparse_extent(handle_t *handle,
++						 struct inode *inode,
++						 struct ext4_ext_path *path,
++						 ext4_lblk_t start_block,
++						 ext4_lblk_t end_block)
++{
++	struct ext4_extent newext;
++	int err = 0;
++	unsigned int credits;
++	ext4_lblk_t copy_blocks = (end_block - start_block);
++
++	credits = ext4_chunk_trans_blocks(inode, 0);
++	if (unlikely(!ext4_handle_has_enough_credits(handle, credits))) {
++		err = ext4_journal_extend(handle, credits);
++		if (err < 0) {
++			CERROR("couldn't extend journal "
++			       "inode=%lu handle=%p need=%d has=%d err=%d\n",
++			       inode->i_ino, handle, credits,
++			       handle->h_buffer_credits, err);
++			return err;
++		} else if (err != 0) {
++			err = ext4_journal_restart(handle, credits);
++			if (err) {
++				/* This should never happen.
++				 * It may panic with ext4_journal_stop() if
++				 * ext4_journal_restart() fails.
++				 */
++				CERROR("couldn't extend and restart journal "
++				       "inode=%lu handle=%p need=%d err=%d\n",
++				       inode->i_ino, handle, credits,
++				       err);
++
++				return err;
++			}
++		}
++	}
++
++	/* reference ext4_ext_get_blocks() */
++	newext.ee_block = cpu_to_le32(start_block);
++	newext.ee_len = cpu_to_le16(copy_blocks);
++
++	EXT4_SNAPSHOT_SET_SPARSE_EXTENT(&newext);
++
++	err = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
++	if (err)
++		CERROR("fail to insert extent inode=%lu err=%d\n",
++		       inode->i_ino, err);
++	return err;
++}
++
++static int ext4_snapshot_do_copy(handle_t *handle,
++				    struct inode *src_inode,
++				    struct inode *inode,
++				    ext4_lblk_t start_block,
++				    ext4_lblk_t end_block,
++				    ext4_fsblk_t start_pblock,
++				    bool sparse, bool sync_journal)
++{
++	ext4_lblk_t block = start_block;
++	ext4_fsblk_t pblock = start_pblock;
++	struct ext4_ext_path *path = NULL, *p = NULL;
++	int depth = 0, err = 0;
++	int block_copy = 0;
++
++	down_write(&EXT4_I(inode)->i_data_sem);
++	while (block < end_block) {
++		struct ext4_extent *ex = NULL;
++		ext4_lblk_t copy_start, copy_end, next;
++		ext4_fsblk_t  copy_pstart;
++
++		if (path && ext_depth(inode) != depth) {
++			/* depth was changed. we have to realloc path */
++			kfree(path);
++			path = NULL;
++		}
++
++		p = ext4_ext_find_extent(inode, block, path);
++		if (IS_ERR(p)) {
++			err = PTR_ERR(p);
++			CERROR("cannot find extent on inode=%lu err=%d\n",
++			       inode->i_ino, err);
++
++			break;
++		}
++
++		path = p;
++		depth = ext_depth(inode);
++		if (unlikely(path[depth].p_hdr == NULL)) {
++			EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL",
++					 depth);
++			err = -EIO;
++			break;
++		}
++		ex = path[depth].p_ext;
++		next = ext4_ext_next_allocated_block(path);
++		/*  reference ext4_ext_walk_space() */
++		if (!ex) {
++			/* there is no extent.
++			 * so try to copy all */
++			copy_start = block;
++			copy_end = end_block;
++			copy_pstart = pblock;
++		} else {
++			ext4_lblk_t ee_block =
++				le32_to_cpu(ex->ee_block);
++			ext4_lblk_t ee_end =
++				ee_block + ext4_ext_get_actual_len(ex);
++
++			if (block < ee_block) {
++				/* need to copy before found extent */
++				copy_start = block;
++				copy_end = ee_block;
++				copy_pstart = pblock;
++			} else if (ee_end <= block) {
++				/* need to copy after found extent */
++				copy_start = block;
++				copy_end = end_block;
++				copy_pstart = pblock;
++			} else if (ee_end < end_block) {
++				/* some part of requested space is covered
++				 * by found extent */
++				copy_start = ee_end;
++				copy_end = end_block;
++				copy_pstart = pblock + (copy_start - block);
++			} else
++				break;
++
++		}
++		if (next <= copy_end)
++			copy_end = next;
++		if (end_block < copy_end)
++			copy_end = end_block;
++		if (copy_start >= copy_end)
++			goto skip_copy;
++
++		if (sparse) {
++			err = ext4_snapshot_insert_sparse_extent(handle,
++								    inode, path,
++								    copy_start,
++								    copy_end);
++		} else {
++			err = ext4_snapshot_copy_extent(handle,
++							   src_inode,
++							   inode, path,
++							   copy_start,
++							   copy_end,
++							   copy_pstart);
++			block_copy++;
++		}
++	skip_copy:
++		ext4_ext_drop_refs(path);
++		if (err)
++			break;
++
++		pblock += (copy_end - block);
++		block = copy_end;
++	}
++	up_write(&EXT4_I(inode)->i_data_sem);
++
++	if (path) {
++		ext4_ext_drop_refs(path);
++		kfree(path);
++	}
++	if (err == 0 && sync_journal && block_copy)
++		ext4_handle_sync(handle);
++
++	return err;
++}
++
++/*
++ * ext4_snapshot_copy_sparse
++ *
++ *  insert sparse extents to old snapshot file.
++ *
++ * \param[in]	inode		inode of snapshot file
++ * \param[in]	start		start lblock
++ * \param[in]	end		end lblock
++ *
++ * \retval	0		success
++ * \retval	less than 0	failure (-errno)
++ */
++static int ext4_snapshot_copy_sparse(struct inode *inode,
++					ext4_lblk_t start,
++					ext4_lblk_t end)
++{
++	handle_t *handle;
++	struct inode *old_inode;
++	struct super_block *old_sb;
++	struct ext4_snapshot_link link, next_link;
++	ext4_lblk_t max;
++	int credits, err;
++	bool clear_link = false;
++
++	err = ext4_snapshot_get_link(inode, &link);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		credits = 1;		/* iflag only */
++		goto clear_snap;
++	}
++
++	/* get snapshot old link data & old inode from xattr */
++	old_inode = ext4_snapshot_read_link(inode, &link,
++					       &next_link, false, &err);
++	if (err == 0 && old_inode == NULL) {
++		CERROR("invalid orig link link.old_ino == 0,"
++		       " inode=%lu\n", inode->i_ino);
++		err = -ENOENT;
++	}
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		credits = ext4_calc_snapshot_link_credits(
++				inode, SNAPSHOT_CLEAR_LINK_OP);
++		clear_link = true;
++		goto clear_snap;
++	}
++	old_sb = old_inode->i_sb;
++
++	/* ignore block bigger than file size */
++	max = (old_inode->i_size + EXT4_BLOCK_SIZE(old_sb) - 1)
++		>> EXT4_BLOCK_SIZE_BITS(old_sb);
++	if (end > max)
++		end = max;
++	if (start >= end)
++		goto out;
++
++	/* start journal */
++	credits = ext4_ext_calc_credits_for_insert(old_inode, NULL)
++			+ EXT4_ALLOC_NEEDED + 1;
++	handle = ext4_journal_start(old_inode, credits);
++	if (IS_ERR(handle)) {
++		err = PTR_ERR(handle);
++		SNAPSHOT_CONSOLE_ERR(err);
++		goto out;
++	}
++
++	err = ext4_snapshot_do_copy(handle, inode,
++				       old_inode, start,
++				       end, 0, true, false);
++
++	ext4_journal_stop(handle);
++out:
++	iput(old_inode);
++	return err;
++
++/* When an error occurs, change the inode to no snapshot,
++ * and continue the process. */
++clear_snap:
++	handle = ext4_journal_start(inode, credits);
++	if (IS_ERR(handle)) {
++		err = PTR_ERR(handle);
++		SNAPSHOT_CONSOLE_ERR(err);
++		return err;
++	}
++	if (clear_link)
++		ext4_snapshot_del_link(handle, inode);
++	EXT4_CLEAR_OST_SNAPSHOT_FLAGS(inode);
++	ext4_mark_inode_dirty(handle, inode);
++	ext4_journal_stop(handle);
++	return 0;
++}
++
++int ext4_snapshot_copy_blocks(handle_t *handle,
++				 struct inode *inode,
++				 struct inode *old_snap,
++				 ext4_lblk_t start_block,
++				 ext4_lblk_t end_block,
++				 bool sync_journal)
++{
++	int depth = 0, err = 0;
++	ext4_lblk_t block;
++	struct ext4_ext_path *path = NULL, *p = NULL;
++
++	block = start_block;
++
++	while (block < end_block && block != EXT_MAX_BLOCKS) {
++		struct ext4_extent *ex = NULL;
++		ext4_lblk_t ee_block, ee_end, next;
++		ext4_lblk_t copy_start, copy_end;
++		ext4_fsblk_t  copy_pstart;
++
++		down_read(&EXT4_I(inode)->i_data_sem);
++
++		if (path && ext_depth(inode) != depth) {
++			/* depth was changed. we have to realloc path */
++			kfree(path);
++			path = NULL;
++		}
++
++		p = ext4_ext_find_extent(inode, block, path);
++		if (IS_ERR(p)) {
++			err = PTR_ERR(p);
++			CERROR("cannot find extent on inode=%lu.\n",
++			       inode->i_ino);
++			up_read(&EXT4_I(inode)->i_data_sem);
++			break;
++		}
++
++		path = p;
++		depth = ext_depth(inode);
++		if (unlikely(path[depth].p_hdr == NULL)) {
++			up_read(&EXT4_I(inode)->i_data_sem);
++			EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL",
++					 depth);
++			err = -EIO;
++			break;
++		}
++		ex = path[depth].p_ext;
++		next = ext4_ext_next_allocated_block(path);
++		if (!ex) {
++			up_read(&EXT4_I(inode)->i_data_sem);
++			/* there is no extent.
++			 * not necessary to do anything */
++			break;
++		}
++
++		ee_block = le32_to_cpu(ex->ee_block);
++		ee_end = ee_block + ext4_ext_get_actual_len(ex);
++
++		if ((block < ee_block) && (ee_block < end_block)) {
++			copy_start = ee_block;
++			copy_pstart = ext4_ext_pblock(ex);
++		} else if ((ee_block <= block) &&
++			  (block < ee_end)) {
++			copy_start = block;
++			copy_pstart = ext4_ext_pblock(ex)
++				+ (block - ee_block);
++		} else {
++			up_read(&EXT4_I(inode)->i_data_sem);
++			/* there is no extent in the target range.
++			 * not necessary to do anything */
++			break;
++		}
++		copy_end = ee_end;
++		if (end_block < copy_end)
++			copy_end = end_block;
++		ext4_ext_drop_refs(path);
++		up_read(&EXT4_I(inode)->i_data_sem);
++
++		err = ext4_snapshot_do_copy(handle, inode, old_snap,
++					       copy_start, copy_end,
++					       copy_pstart,
++					       EXT4_SNAPSHOT_TEST_SPARSE_EXTENT(ex),
++					       sync_journal);
++		if (err)
++			break;
++
++		block = next;
++	}
++
++	if (path) {
++		ext4_ext_drop_refs(path);
++		kfree(path);
++	}
++	return err;
++}
++
++static int ext4_snapshot_copy_writeblocks(struct inode *inode,
++					     ext4_lblk_t start,
++					     ext4_lblk_t end)
++{
++	handle_t *handle;
++	struct ext4_snapshot_link link, next_link;
++	struct inode *old_inode;
++	struct super_block *old_sb;
++	ext4_lblk_t max;
++	int credits, err;
++	bool clear_link = false;
++
++	err = ext4_snapshot_get_link(inode, &link);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		credits = 1;		/* iflag only */
++		goto clear_snap;
++	}
++
++	/* get snapshot old link data & old inode from xattr */
++	old_inode = ext4_snapshot_read_link(inode, &link,
++					    &next_link, false, &err);
++	if (err == 0 && old_inode == NULL) {
++		CERROR("invalid orig link link.old_ino == 0,"
++		       " inode=%lu\n", inode->i_ino);
++		err = -ENOENT;
++	}
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		credits = ext4_calc_snapshot_link_credits(
++				inode, SNAPSHOT_CLEAR_LINK_OP);
++		clear_link = true;
++		goto clear_snap;
++	}
++	old_sb = old_inode->i_sb;
++
++	/* ignore block bigger than file size */
++	max = (old_inode->i_size + EXT4_BLOCK_SIZE(old_sb) - 1)
++		>> EXT4_BLOCK_SIZE_BITS(old_sb);
++	if (end > max)
++		end = max;
++	if (start >= end)
++		goto out;
++
++	/* start journal */
++	credits = ext4_ext_calc_credits_for_insert(old_inode, NULL)
++			+ EXT4_ALLOC_NEEDED + 1;
++	handle = ext4_journal_start(old_inode, credits);
++	if (IS_ERR(handle)) {
++		err = PTR_ERR(handle);
++		SNAPSHOT_CONSOLE_ERR(err);
++		goto out;
++	}
++
++	err = ext4_snapshot_copy_blocks(handle, inode,
++					   old_inode, start, end,
++					   true);
++	ext4_journal_stop(handle);
++out:
++	iput(old_inode);
++	return err;
++
++/* When an error occurs, change the inode to no snapshot,
++ * and continue the process. */
++clear_snap:
++	handle = ext4_journal_start(inode, credits);
++	if (IS_ERR(handle)) {
++		err = PTR_ERR(handle);
++		SNAPSHOT_CONSOLE_ERR(err);
++		return err;
++	}
++	if (clear_link)
++		ext4_snapshot_del_link(handle, inode);
++	EXT4_CLEAR_OST_SNAPSHOT_FLAGS(inode);
++	ext4_mark_inode_dirty(handle, inode);
++	ext4_journal_stop(handle);
++	return 0;
++}
++
++int ext4_snapshot_truncate_blocks(handle_t *handle,
++				     struct inode *inode,
++				     struct inode *old_inode,
++				     ext4_lblk_t start_block,
++				     ext4_lblk_t end_block)
++{
++	struct ext4_ext_path *path = NULL, *p = NULL;
++	struct ext4_extent *ex;
++	ext4_lblk_t next, start = 0, end = 0;
++	int exists, depth = 0, err = 0;
++	ext4_lblk_t block = start_block;
++
++	while (block < end_block && block != EXT_MAX_BLOCKS) {
++		/* find extent for this block */
++		down_read(&EXT4_I(inode)->i_data_sem);
++
++		if (path && ext_depth(inode) != depth) {
++			/* depth was changed. we have to realloc path */
++			kfree(path);
++			path = NULL;
++		}
++
++		p = ext4_ext_find_extent(inode, block, path);
++		if (IS_ERR(p)) {
++			err = PTR_ERR(p);
++			CERROR("cannot find extent on inode=%lu.\n",
++			       inode->i_ino);
++			up_read(&EXT4_I(inode)->i_data_sem);
++			break;
++		}
++
++		path = p;
++		depth = ext_depth(inode);
++		if (unlikely(path[depth].p_hdr == NULL)) {
++			up_read(&EXT4_I(inode)->i_data_sem);
++			EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL",
++					 depth);
++			err = -EIO;
++			break;
++		}
++		ex = path[depth].p_ext;
++		next = ext4_ext_next_allocated_block(path);
++		up_read(&EXT4_I(inode)->i_data_sem);
++
++		exists = 0;
++		if (!ex) {
++			/* there is no extent yet, so try to allocate
++			 * all requested space */
++			start = block;
++			end = end_block;
++		} else if (le32_to_cpu(ex->ee_block) > block) {
++			/* need to allocate space before found extent */
++			start = block;
++			end = le32_to_cpu(ex->ee_block);
++			if (end_block < end)
++				end = end_block;
++		} else if (block >= le32_to_cpu(ex->ee_block)
++					+ ext4_ext_get_actual_len(ex)) {
++			/* need to allocate space after found extent */
++			start = block;
++			end = end_block;
++			if (end >= next)
++				end = next;
++		} else if (block >= le32_to_cpu(ex->ee_block)) {
++			/*
++			 * some part of requested space is covered
++			 * by found extent
++			 */
++			start = block;
++			end = le32_to_cpu(ex->ee_block)
++				+ ext4_ext_get_actual_len(ex);
++			if (end_block < end)
++				end = end_block;
++			exists = 1;
++		} else {
++			BUG();
++		}
++		BUG_ON(end <= start);
++
++		if (!exists) {
++			err = ext4_snapshot_do_copy(handle, inode,
++						       old_inode, start,
++						       end, 0, true, false);
++		} else {
++			err = ext4_snapshot_do_copy(handle, inode,
++						       old_inode, start,
++						       end,
++						       ext4_ext_pblock(ex)
++						       + (block - start),
++						       false, false);
++		}
++		ext4_ext_drop_refs(path);
++
++		if (err < 0) {
++			SNAPSHOT_CONSOLE_ERR(err);
++			break;
++		}
++
++		block = end;
++	}
++
++	if (path) {
++		ext4_ext_drop_refs(path);
++		kfree(path);
++	}
++
++	return err;
++}
++
+ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
+ 			ext4_lblk_t num, ext_prepare_callback func,
+ 			void *cbdata)
+@@ -4498,6 +5743,8 @@
+ 	ext4_lblk_t last = block + num;
+ 	int depth, exists, err = 0;
+ 
++	struct bpointers *bp = cbdata;
++
+ 	BUG_ON(func == NULL);
+ 	BUG_ON(inode == NULL);
+ 
+@@ -4569,10 +5816,42 @@
+ 			cbex.ec_block = start;
+ 			cbex.ec_len = end - start;
+ 			cbex.ec_start = 0;
++			if (EXT4_TEST_OST_SNAPSHOT_FILE(inode)
++			    && (bp->create == READ_OP)) {
++				/* if READ process && inode is snapshot */
++				err = ext4_snapshot_read(inode, &cbex,
++							    &exists, NULL);
++				if (err < 0)
++					break;
++			} else if (EXT4_TEST_OST_SNAPSHOT_ORIG(inode)
++				   && (bp->create == WRITE_COPY_OP)) {
++				/* snapshot copy sparse */
++				err = ext4_snapshot_copy_sparse(
++						inode, start, end);
++				if (err < 0)
++					break;
++			}
+ 		} else {
+-			cbex.ec_block = le32_to_cpu(ex->ee_block);
+-			cbex.ec_len = ext4_ext_get_actual_len(ex);
+-			cbex.ec_start = ext4_ext_pblock(ex);
++			if (EXT4_TEST_OST_SNAPSHOT_ORIG(inode)
++			    && (bp->create == WRITE_COPY_OP)) {
++				/* snapshot copy on write */
++				err = ext4_snapshot_copy_writeblocks(
++						inode, start, end);
++				if (err)
++					break;
++			}
++
++			if (EXT4_TEST_OST_SNAPSHOT_FILE(inode) &&
++			    bp->create == READ_OP &&
++			    EXT4_SNAPSHOT_TEST_SPARSE_EXTENT(ex)) {
++				cbex.ec_block = start;
++				cbex.ec_len = end - start;
++				cbex.ec_start = 0;
++			} else {
++				cbex.ec_block = le32_to_cpu(ex->ee_block);
++				cbex.ec_len = ext4_ext_get_actual_len(ex);
++				cbex.ec_start = ext4_ext_pblock(ex);
++			}
+ 		}
+ 
+ 		if (unlikely(cbex.ec_len == 0)) {
+diff -urN -x .svn linux-stage.org/fs/ext4/inode.c linux-stage/fs/ext4/inode.c
+--- linux-stage.org/fs/ext4/inode.c	2018-08-31 20:53:57.000000000 +0900
++++ linux-stage/fs/ext4/inode.c	2018-09-03 15:18:02.000000000 +0900
+@@ -40,9 +40,9 @@
+ #include <linux/workqueue.h>
+ 
+ #include "ext4_jbd2.h"
+-#include "xattr.h"
+ #include "acl.h"
+ #include "ext4_extents.h"
++#include "snapshot.h"
+ 
+ #include <trace/events/ext4.h>
+ 
+@@ -5329,7 +5329,10 @@
+ {
+ 	unsigned int flags = EXT4_I(inode)->i_flags;
+ 
+-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
++	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|
++			    EXT4_SNAPSHOT_SHARE_FL|
++			    EXT4_SNAPSHOT_SP_FL|
++			    EXT4_SNAPSHOT_FL);
+ 	if (flags & EXT4_SYNC_FL)
+ 		inode->i_flags |= S_SYNC;
+ 	if (flags & EXT4_APPEND_FL)
+@@ -5340,6 +5343,13 @@
+ 		inode->i_flags |= S_NOATIME;
+ 	if (flags & EXT4_DIRSYNC_FL)
+ 		inode->i_flags |= S_DIRSYNC;
++	if (flags & EXT4_SNAPSHOT_SHARE_FL)
++		inode->i_flags |= EXT4_SNAPSHOT_SHARE_FL;
++	if (flags & EXT4_SNAPSHOT_SP_FL)
++		inode->i_flags |= EXT4_SNAPSHOT_SP_FL;
++	if (flags & EXT4_SNAPSHOT_FL)
++		inode->i_flags |= EXT4_SNAPSHOT_FL;
++
+ }
+ 
+ /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
+@@ -5352,8 +5362,12 @@
+ 		vfs_fl = ei->vfs_inode.i_flags;
+ 		old_fl = ei->i_flags;
+ 		new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
+-				EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
+-				EXT4_DIRSYNC_FL);
++				    EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
++				    EXT4_DIRSYNC_FL |
++				    EXT4_SNAPSHOT_SHARE_FL|
++				    EXT4_SNAPSHOT_SP_FL|
++				    EXT4_SNAPSHOT_FL);
++
+ 		if (vfs_fl & S_SYNC)
+ 			new_fl |= EXT4_SYNC_FL;
+ 		if (vfs_fl & S_APPEND)
+@@ -5364,6 +5378,12 @@
+ 			new_fl |= EXT4_NOATIME_FL;
+ 		if (vfs_fl & S_DIRSYNC)
+ 			new_fl |= EXT4_DIRSYNC_FL;
++		if (vfs_fl & EXT4_SNAPSHOT_SHARE_FL)
++			new_fl |= EXT4_SNAPSHOT_SHARE_FL;
++		if (vfs_fl & EXT4_SNAPSHOT_SP_FL)
++			new_fl |= EXT4_SNAPSHOT_SP_FL;
++		if (vfs_fl & EXT4_SNAPSHOT_FL)
++			new_fl |= EXT4_SNAPSHOT_FL;
+ 	} while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
+ }
+ 
+@@ -6061,6 +6081,7 @@
+ 
+ 	return ret;
+ }
++EXPORT_SYMBOL(ext4_meta_trans_blocks);
+ 
+ /*
+  * Calulate the total number of credits to reserve to fit
+@@ -6500,3 +6521,26 @@
+ 	return rc;
+ }
+ EXPORT_SYMBOL(ext4_map_inode_page);
++
++int ext4_snapshot_orphan_truncate(struct inode *inode,
++				  struct ext4_snapshot_gen_lock **lock)
++{
++	handle_t *handle;
++	int err;
++
++	err = ext4_snapshot_lock(inode, false, lock);
++	if ((err != 0) || (*lock == NULL))
++		return err;
++
++	handle = ext4_journal_start(inode, blocks_for_truncate(inode));
++	if (IS_ERR(handle)) {
++		err = PTR_ERR(handle);
++		return err;
++	}
++
++	err = ext4_snapshot_punch(handle, inode, inode->i_size,
++				  ~0ULL /* = OBD_OBJECT_EOF */);
++
++	ext4_journal_stop(handle);
++	return err;
++}
+diff -urN -x .svn linux-stage.org/fs/ext4/snapshot.c linux-stage/fs/ext4/snapshot.c
+--- linux-stage.org/fs/ext4/snapshot.c	1970-01-01 09:00:00.000000000 +0900
++++ linux-stage/fs/ext4/snapshot.c	2018-11-01 11:49:59.000000000 +0900
+@@ -0,0 +1,1222 @@
++/*
++ * GPL HEADER START
++ *
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 only,
++ * as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License version 2 for more details.  A copy is
++ * included in the COPYING file that accompanied this code.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ *
++ * GPL HEADER END
++ */
++/*
++ *   Copyright(c) 2016-2018 FUJITSU LIMITED.
++ *   All rights reserved.
++ */
++
++#include <linux/init.h>
++#include <linux/fs.h>
++#include <linux/slab.h>
++#include <linux/mbcache.h>
++#include "linux/quotaops.h"
++#include <linux/rwsem.h>
++#include <linux/list.h>
++#include "ext4_jbd2.h"
++#include "ext4.h"
++#include "acl.h"
++#include "snapshot.h"
++#include "snapshot_debug.h"
++
++
++/* snapshot generation lock list head */
++static LIST_HEAD(snap_lock_list);
++
++/* mutex object for snapshot generation lock list */
++static struct mutex	snap_list_mutex;
++
++/* snapshot lock timeout var */
++static int	snapshot_lock_timeout = SNAPSHOT_LOCK_TIMEOUT;
++
++
++/* same as lustre/include/lustre/lustre_user.h */
++struct lu_fid {
++	/**
++	 * FID sequence. Sequence is a unit of migration: all files (objects)
++	 * with FIDs from a given sequence are stored on the same server.
++	 * Lustre should support 2^64 objects, so even if each sequence
++	 * has only a single object we can still enumerate 2^64 objects.
++	 **/
++	__u64 f_seq;
++	/* FID number within sequence. */
++	__u32 f_oid;
++	/**
++	 * FID version, used to distinguish different versions (in the sense
++	 * of snapshots, etc.) of the same file system object. Not currently
++	 * used.
++	 **/
++	__u32 f_ver;
++};
++
++/* same as lustre/include/lu_object.h */
++struct lu_buf {
++	void   *lb_buf;
++	ssize_t lb_len;
++};
++
++/* snapshot request data */
++struct snapshot_list_data {
++	struct lu_fid fid;
++	unsigned long ost_ino;
++};
++
++#define IOC_SNAPSHOT_LIST_MAX 256
++
++struct snapshot_list_buf {
++	struct snapshot_list_data list_data[IOC_SNAPSHOT_LIST_MAX];
++	int list_num;
++};
++
++struct lustre_mdt_attrs {
++	/**
++	 * Bitfield for supported data in this structure. From enum lma_compat.
++	 * lma_self_fid and lma_flags are always available.
++	 */
++	__u32   lma_compat;
++	/**
++	 * Per-file incompat feature list. Lustre version should support all
++	 * flags set in this field. The supported feature mask is available in
++	 * LMA_INCOMPAT_SUPP.
++	 */
++	__u32   lma_incompat;
++	/** FID of this inode */
++	struct lu_fid  lma_self_fid;
++};
++
++/*
++ * ext4_get_snapshot_lock_timeout()
++ *
++ * get timeout of snapshot rock wait
++ *
++ * \param[in]	-
++ *
++ * \retval			snapshot lock timeout (sec)
++ */
++int ext4_get_snapshot_lock_timeout(void)
++{
++	/* get the snapshot lock timeout */
++	return snapshot_lock_timeout;
++}
++EXPORT_SYMBOL(ext4_get_snapshot_lock_timeout);
++
++/*
++ * ext4_set_snapshot_lock_timeout()
++ *
++ * set timeout of snapshot lock wait
++ *
++ * \param[in]   tout            snapshot lock timeout (src)
++ *
++ * \retval      none
++ */
++
++void ext4_set_snapshot_lock_timeout(int tout)
++{
++	snapshot_lock_timeout = tout;
++	return;
++}
++EXPORT_SYMBOL(ext4_set_snapshot_lock_timeout);
++
++/*
++ * ext4_snapshot_get_enable()
++ *
++ * get status of snapshot enable/disable
++ *
++ * \param[in]	sb		super block
++ *
++ * \retval	0		snapshot is disabled
++ * \retval	1		snapshot is enabled
++ */
++int ext4_snapshot_get_enable(struct super_block *sb)
++{
++	/* check the feature bits in super block */
++	return EXT4_HAS_RO_COMPAT_FEATURE(sb,
++			EXT4_FEATURE_RO_COMPAT_SNAPSHOT);
++}
++EXPORT_SYMBOL(ext4_snapshot_get_enable);
++
++/*
++ * ext4_snapshot_set_enable()
++ *
++ * enable snapshot functions
++ *
++ * \param[in]	sb		super block
++ *
++ * \retval	0		success
++ * \retval	less than 0	failure (-errno)
++ */
++int ext4_snapshot_set_enable(struct super_block *sb)
++{
++	handle_t *handle;
++	int err;
++
++	/* start journal */
++	handle = ext4_journal_start_sb(sb, 1);
++	if (IS_ERR(handle)) {
++		err = PTR_ERR(handle);
++		CERROR("fail to start journal err=%d\n", err);
++		goto out;
++	}
++
++	/* get journal */
++	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
++	if (err) {
++		CERROR("error %d on journal write access\n", err);
++		goto out_stop_journal;
++	}
++
++	/* set */
++	EXT4_SET_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_SNAPSHOT);
++	sb->s_dirt = 1;
++
++	/* mark dirty */
++	err = ext4_handle_dirty_metadata(handle, NULL,
++				      EXT4_SB(sb)->s_sbh);
++	if (err) {
++		CERROR("error %d on handle dirty metadata\n", err);
++		goto out_stop_journal;
++	}
++	/* stop journal */
++	err = ext4_journal_stop(handle);
++	if (err) {
++		CERROR("error %d on journal stop\n", err);
++		goto out;
++	}
++	return 0;
++
++out_stop_journal:
++	ext4_journal_stop(handle);
++out:
++	return err;
++}
++EXPORT_SYMBOL(ext4_snapshot_set_enable);
++
++/*
++ * ext4_snapshot_lock()
++ *
++ * lock between the snapshot generations
++ *
++ * \param[in]	inode		snapshot inode
++ * \param[in]	create		for create snapshot
++ * \param[out]	lock		snapshot lock object
++ *				this data is used at unlock
++ *
++ * \retval	0		success
++ * \retval	less than 0	failure (-errno)
++ */
++int ext4_snapshot_lock(struct inode *inode, bool create,
++			  struct ext4_snapshot_gen_lock **lock)
++{
++	struct ext4_snapshot_gen_lock	*entry;
++	int	err = 0, delay, match_f, count = 0, max_count;
++	unsigned long				org_ino;
++	struct timespec				org_ts;
++	struct ext4_snapshot_link	    link;
++	/* calc lock delay time */
++	delay = HZ / SNAPSHOT_LOCK_FREQ;
++	if (delay == 0)
++		delay = 1;
++	/* calc snapshot lock timeout count */
++	max_count = snapshot_lock_timeout * SNAPSHOT_LOCK_FREQ;
++	/* initialize lock object */
++	*lock = NULL;
++
++	if (!create && !EXT4_TEST_OST_SNAPSHOT(inode))
++		return 0;
++
++	/* is it 1st time of create snapshot  or snapshot orig file*/
++	if (!EXT4_TEST_OST_SNAPSHOT_FILE(inode)) {
++		org_ino = inode->i_ino;
++		org_ts =  EXT4_I(inode)->i_crtime;
++	} else {
++		err = ext4_snapshot_get_link(inode, &link);
++		if (err)
++			goto err_out;
++		org_ino = link.org_ino;
++		org_ts = link.org_ts;
++	}
++
++repeat:
++	/* lock snapshot lock list */
++	mutex_lock(&snap_list_mutex);
++	match_f = 0;
++	/* scan list, and find same lock object from list */
++	list_for_each_entry(entry, &snap_lock_list, list) {
++		/* compare inode no & inode mtime */
++		if (entry->org == org_ino &&
++		    SNAPSHOT_MATCH_TS(&entry->ts, &org_ts)) {
++			match_f = 1;
++			break;
++		}
++	}
++
++	/* add snapshot lock object into list, if not found in list */
++	if (!match_f) {
++		entry = NULL;
++		if (!create && !EXT4_TEST_OST_SNAPSHOT(inode)) {
++			mutex_unlock(&snap_list_mutex);
++			return 0;
++		}
++		entry = kmalloc(sizeof(*entry), GFP_NOFS);
++		if (entry == NULL) {
++			CERROR("fail to allocate snapshot lock.\n");
++			err = -ENOMEM;
++			/* unlock snapshot lock list */
++			mutex_unlock(&snap_list_mutex);
++			goto err_out;
++		}
++		entry->org = org_ino;
++		entry->ts = org_ts;
++		list_add_tail(&entry->list, &snap_lock_list);
++		/* now snapshot is locking */
++		*lock = entry;
++		/* unlock snapshot lock list */
++		mutex_unlock(&snap_list_mutex);
++		return 0;
++	}
++	/* unlock snapshot lock list */
++	mutex_unlock(&snap_list_mutex);
++
++	/* wait delay ms, if snapshot is already locked */
++	count++;
++	if ((max_count > 0) && (count > max_count)) {
++		CWARN("snapshot lock timeout.\n");
++		err = -ETIME;
++		goto err_out;
++	}
++	schedule_timeout_uninterruptible(delay);
++	goto repeat;
++
++err_out:
++	return err;
++}
++EXPORT_SYMBOL(ext4_snapshot_lock);
++
++/*
++ * ext4_snapshot_unlock()
++ *
++ * unlock between the snapshot generations
++ *
++ * \param[in]	lock		snapshot lock object
++ *
++ * \retval	none
++ */
++void ext4_snapshot_unlock(struct ext4_snapshot_gen_lock *lock)
++{
++	LASSERT(lock);
++	/* lock snapshot lock list */
++	mutex_lock(&snap_list_mutex);
++	/* delete lock object from list */
++	list_del(&lock->list);
++	/* unlock snapshot lock list */
++	mutex_unlock(&snap_list_mutex);
++	/* free napshot lock object */
++	kfree(lock);
++	return;
++}
++EXPORT_SYMBOL(ext4_snapshot_unlock);
++
++struct inode *ext4_snapshot_read_link(struct inode *inode,
++					 struct ext4_snapshot_link *my_link,
++					 struct ext4_snapshot_link *tgt_link,
++					 bool new, int *err)
++{
++	unsigned long tgt_ino;
++	struct inode *tgt_inode;
++	*err = 0;
++	if (new) {
++		if (my_link->new_ino == 0)
++			return NULL;
++		tgt_ino = my_link->new_ino;
++	} else {
++		if (my_link->old_ino == 0)
++			return NULL;
++		tgt_ino = my_link->old_ino;
++	}
++	*err = -ENOENT;
++
++	tgt_inode = ext4_iget(inode->i_sb, tgt_ino);
++	if (IS_ERR(tgt_inode)) {
++		*err = PTR_ERR(tgt_inode);
++		CERROR("fail to get %s snapshot inode. "
++		       "me=%lu tgt_inode=%lu err=%d\n", (new ? "new" : "old"),
++		       inode->i_ino, tgt_ino, *err);
++		return NULL;
++	}
++	if (!EXT4_TEST_OST_SNAPSHOT(tgt_inode)) {
++		CERROR("%s snapshot inode is invalid flag. "
++		       "inode=%lu flag=%lx\n",
++		       (new ? "new" : "old"), tgt_ino,
++		       EXT4_I(tgt_inode)->i_flags);
++		goto err_close;
++	}
++	*err = ext4_snapshot_get_link(tgt_inode, tgt_link);
++	if (*err)
++		goto err_close;
++
++	if ((tgt_link->org_ino != my_link->org_ino) ||
++	    (!SNAPSHOT_MATCH_TS(&tgt_link->org_ts, &my_link->org_ts)) ||
++	    (new && (tgt_link->old_ino != inode->i_ino)) ||
++	    (!new && (tgt_link->new_ino != inode->i_ino))) {
++		CERROR("invalid %s snapshot link. "
++		       "mylink : ino=%lu orig=%lu new=%lu old=%lu ts=%llu "
++		       "tgtlink : ino=%lu orig=%lu new=%lu old=%lu ts=%llu\n",
++		       (new ? "new" : "old"),
++		       inode->i_ino, my_link->org_ino, my_link->new_ino,
++		       my_link->old_ino,
++		       SNAPSHOT_CLTIME(&my_link->org_ts),
++		       tgt_inode->i_ino, tgt_link->org_ino, tgt_link->new_ino,
++		       tgt_link->old_ino, SNAPSHOT_CLTIME(&tgt_link->org_ts));
++		*err = -EXDEV;
++		goto err_close;
++	}
++	return tgt_inode;
++
++ err_close:
++	iput(tgt_inode);
++	return NULL;
++}
++
++/*
++ * ext4_snapshot_clone()
++ *
++ * update the snapshot link information in the xattr
++ * target inode is a original file, new created snapshot
++ * and previous generation of snapshot.
++ *
++ * \param[in]	snap_inode	new created snapshot inode
++ * \param[in]	orig_inode	snapshot original file inode
++ *
++ * \retval	0		success
++ * \retval	less than 0	failure (retval is error code)
++ */
++
++int ext4_snapshot_clone(struct inode *snap_inode,
++			   struct inode *orig_inode)
++{
++	handle_t		*handle;
++	struct inode *old_inode = NULL;
++	struct ext4_snapshot_link orig_link, snap_link, old_link;
++	loff_t			rb_snap_size, rb_snap_disksize;
++	int	err = 0, err2 = 0, credits;
++
++	rb_snap_size = i_size_read(snap_inode);
++	rb_snap_disksize = EXT4_I(snap_inode)->i_disksize;
++
++	if (!EXT4_TEST_OST_SNAPSHOT(orig_inode)) {
++		orig_link.org_ino = orig_inode->i_ino;
++		orig_link.org_ts  = EXT4_I(orig_inode)->i_crtime;
++		orig_link.new_ino = 0;
++		orig_link.old_ino = 0;
++	} else {
++		err = ext4_snapshot_get_link(orig_inode, &orig_link);
++		if (err) {
++			SNAPSHOT_CONSOLE_ERR(err);
++			return err;
++		}
++	}
++
++	/* get old link */
++	if (orig_link.old_ino) {
++		old_inode = ext4_snapshot_read_link(orig_inode,
++						       &orig_link,
++						       &old_link,
++						       false, &err);
++		if (err) {
++			SNAPSHOT_CONSOLE_ERR(err);
++			return err;
++		}
++	}
++
++	credits = ext4_calc_snapshot_link_credits(orig_inode,
++						     SNAPSHOT_CREATE_OP);
++	/* start journal */
++	handle = ext4_journal_start(snap_inode, credits);
++	if (IS_ERR(handle)) {
++		err = PTR_ERR(handle);
++		CERROR("fail to start snapshot journal. inode=%lu err=%d\n",
++			snap_inode->i_ino, err);
++		SNAPSHOT_CONSOLE_ERR(err);
++		goto out_iput;
++	}
++
++	/* update new snap xattr link  */
++	snap_link.org_ino = orig_inode->i_ino;
++	snap_link.org_ts  = orig_link.org_ts;
++	snap_link.new_ino = orig_inode->i_ino;
++	snap_link.old_ino = orig_link.old_ino;
++
++	err = ext4_snapshot_set_link(handle, snap_inode, &snap_link);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		goto out_stop_journal;
++	}
++
++	/* update xattr snapshot link of old snap */
++	if (old_inode) {
++		old_link.new_ino = snap_inode->i_ino;
++		err = ext4_snapshot_set_link(handle, old_inode, &old_link);
++		if (err) {
++			SNAPSHOT_CONSOLE_ERR(err);
++			goto out_rb_snap_link;
++		}
++	}
++
++	/* update original xattr link */
++	orig_link.old_ino = snap_inode->i_ino;
++	err = ext4_snapshot_set_link(handle, orig_inode, &orig_link);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		goto out_rb_old_link;
++	}
++
++	/* set new snapshot size & disksize */
++	i_size_write(snap_inode, i_size_read(orig_inode));
++	EXT4_I(snap_inode)->i_disksize = EXT4_I(orig_inode)->i_disksize;
++	/* set new snapshot i_flags */
++	EXT4_SET_OST_SNAPSHOT_FILE(snap_inode);
++	/* update new snapshot inode */
++	err = ext4_mark_inode_dirty(handle, snap_inode);
++	if (err) {
++		CERROR("fail to dirty new snapshot flags. inode=%lu err=%d\n",
++			snap_inode->i_ino, err);
++		SNAPSHOT_CONSOLE_ERR(err);
++		/* roll-back snapshot size & disksize */
++		i_size_write(snap_inode, rb_snap_size);
++		EXT4_I(snap_inode)->i_disksize = rb_snap_size;
++		/* clear new snapshot i_flags */
++		EXT4_CLEAR_OST_SNAPSHOT_FLAGS(snap_inode);
++		goto out_rb_orig_link;
++	}
++
++	/* update original i_flags(SNAPSHOT_SHARE_FL)  */
++	if (!(EXT4_TEST_OST_SNAPSHOT_ORIG(orig_inode))) {
++		/* set inode flags (SNAPSHOT_SHARE_FL) */
++		EXT4_SET_OST_SNAPSHOT_ORIG(orig_inode);
++		/* update original inode */
++		err = ext4_mark_inode_dirty(handle, orig_inode);
++		if (err) {
++			SNAPSHOT_CONSOLE_ERR(err);
++			CERROR("fail to dirty orig snapshot flags. "
++				"inode=%lu err=%d\n", orig_inode->i_ino, err);
++			EXT4_CLEAR_OST_SNAPSHOT_FLAGS(orig_inode);
++			goto out_rb_snap_flag;
++		}
++	}
++
++	ext4_journal_stop(handle);
++	if (old_inode)
++		iput(old_inode);
++
++	return 0;
++
++out_rb_snap_flag:
++	/* roll-back snapshot size & disksize */
++	i_size_write(snap_inode, rb_snap_size);
++	EXT4_I(snap_inode)->i_disksize = rb_snap_size;
++	/* clear snapshot inode i_flags of SNAPSHOT_FLAGS */
++	EXT4_CLEAR_OST_SNAPSHOT_FLAGS(snap_inode);
++	/* update original inode */
++	err2 = ext4_mark_inode_dirty(handle, snap_inode);
++	if (err2) {
++		SNAPSHOT_CONSOLE_ERR(err2);
++		goto out_stop_journal;
++	}
++out_rb_orig_link:
++	/* roll-back xattr snapshot link of original */
++	if (!(EXT4_TEST_OST_SNAPSHOT_ORIG(orig_inode)))
++		err2 = ext4_snapshot_del_link(handle, orig_inode);
++	else {
++		orig_link.old_ino = old_inode->i_ino;
++		err2 = ext4_snapshot_set_link(handle,
++						orig_inode,
++						&orig_link);
++
++	}
++	if (err2) {
++		SNAPSHOT_CONSOLE_ERR(err2);
++		goto out_stop_journal;
++	}
++out_rb_old_link:
++	if (old_inode) {
++		old_link.new_ino = orig_inode->i_ino;
++		err2 = ext4_snapshot_set_link(handle,
++						old_inode, &old_link);
++		if (err2) {
++			SNAPSHOT_CONSOLE_ERR(err2);
++			goto out_stop_journal;
++		}
++	}
++out_rb_snap_link:
++	/* delete xattr snapshot link of new snapshot */
++	err2 = ext4_snapshot_del_link(handle, snap_inode);
++	if (err2)
++		SNAPSHOT_CONSOLE_ERR(err2);
++out_stop_journal:
++	/* stop journal */
++	ext4_journal_stop(handle);
++out_iput:
++	if (old_inode)
++		iput(old_inode);
++	return err;
++}
++EXPORT_SYMBOL(ext4_snapshot_clone);
++
++int ext4_snapshot_punch(handle_t *handle, struct inode *inode,
++			   __u64 start, __u64 end)
++{
++	struct ext4_snapshot_link my_link, old_link;
++	int err, credits;
++	struct inode *old_snap = NULL;
++	ext4_lblk_t  start_block, end_block;
++
++	err = ext4_snapshot_get_link(inode, &my_link);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		goto out_clear;
++	}
++	old_snap = ext4_snapshot_read_link(inode,
++					      &my_link, &old_link,
++					      false, &err);
++	if (err == 0 && old_snap == NULL) {
++		CERROR("invalid orig link link.old_ino == 0,"
++		       " inode=%lu\n", inode->i_ino);
++		err = -ENOENT;
++	}
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		goto out_delete_link;
++	}
++	if (end > old_snap->i_size)
++		end = old_snap->i_size;
++
++	if (start >= end) {
++		iput(old_snap);
++		return 0;
++	}
++
++	start_block = start >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
++	end_block = (end + EXT4_BLOCK_SIZE(inode->i_sb) - 1)
++		>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
++
++	err = ext4_snapshot_truncate_blocks(handle, inode, old_snap,
++					       start_block, end_block);
++	iput(old_snap);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		return err;
++	}
++	return 0;
++
++out_delete_link:
++	credits = ext4_calc_snapshot_link_credits(inode,
++						     SNAPSHOT_CLEAR_LINK_OP);
++	if (!ext4_handle_has_enough_credits(handle, credits)) {
++		err = ext4_journal_extend(handle, credits);
++		if (err < 0) {
++			CERROR("couldn't extend journal inode=%lu "
++			       "handle=%p need=%d has=%d err=%d\n",
++			       inode->i_ino, handle, credits,
++			       handle->h_buffer_credits, err);
++			goto out_clear;
++		} else if (err != 0) {
++			err = ext4_journal_restart(handle, credits);
++			if (err) {
++				/*
++				 * This should never happen.
++				 * It may panic with ext4_journal_stop()
++				 * if ext4_journal_restart() fails.
++				 */
++				CERROR("couldn't extend and restart "
++				       "journal inode=%lu handle=%p "
++				       "need=%d err=%d\n",
++				       inode->i_ino, handle, credits, err);
++				goto out_clear;
++			}
++		}
++	}
++	ext4_snapshot_del_link(handle, inode);
++out_clear:
++	/* When err, return the orig inode to an ordinary inode
++	 * and continue to truncate processing */
++	EXT4_CLEAR_OST_SNAPSHOT_FLAGS(inode);
++	ext4_mark_inode_dirty(handle, inode);
++	return 0;
++}
++EXPORT_SYMBOL(ext4_snapshot_punch);
++
++static int ext4_snapshot_get_ostfid(struct inode *inode,
++				       struct lu_fid *fid)
++{
++	int rc = 0;
++	struct lustre_mdt_attrs lma;
++	rc = ext4_xattr_get(inode,
++				EXT4_XATTR_INDEX_TRUSTED,
++				"lma", (void *)&lma,
++				sizeof(lma));
++	if (rc == sizeof(lma)) {
++		memcpy(fid, &(lma.lma_self_fid),
++		       sizeof(lma.lma_self_fid));
++		return 0;
++	} else if (rc > 0)
++		return -ENOENT;
++	else
++		return rc;
++}
++
++int ext4_snapshot_get_orphan(struct inode *inode,
++				void *fid_buf,
++				int *array_num)
++{
++	int err = 0, err2 = 0;
++	struct lu_fid *fid_array = fid_buf;
++	struct ext4_snapshot_link my_link, cur_link, next_link;
++	struct ext4_snapshot_link *cur_link_buf = NULL,
++		*next_link_buf = NULL, *tmp = NULL;
++	struct inode *new_snap = NULL, *cur = NULL;
++	int max_array = *array_num;
++	int num = 0;
++	unsigned long tmp_ino = 0;
++	handle_t *handle = NULL;
++
++	err = ext4_snapshot_get_link(inode, &my_link);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		return err;
++	}
++
++	if ((my_link.old_ino != 0) ||
++	    (my_link.new_ino == 0)) {
++		return -ENOENT;
++	}
++	new_snap = ext4_snapshot_read_link(inode,
++					      &my_link, &next_link,
++					      true, &err);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		return err;
++	}
++	if (!EXT4_TEST_OST_SNAPSHOT_DEL(new_snap)) {
++		iput(new_snap);
++		*array_num = 0;
++		return -ENOENT;
++	}
++
++	cur = new_snap;
++	next_link_buf = &cur_link;
++	cur_link_buf = &next_link;
++
++	while (cur && EXT4_TEST_OST_SNAPSHOT_DEL(cur)) {
++		new_snap = ext4_snapshot_read_link(cur, cur_link_buf,
++						      next_link_buf,
++						      true, &err);
++		if (err) {
++			iput(cur);
++			SNAPSHOT_CONSOLE_ERR(err);
++			return err;
++		}
++		if (num >= max_array) {
++			if (new_snap)
++				iput(new_snap);
++			err = -EAGAIN;
++			break;
++		}
++
++		err = ext4_snapshot_get_ostfid(cur, &fid_array[num]);
++		if (err) {
++			iput(cur);
++			if (new_snap)
++				iput(new_snap);
++			SNAPSHOT_CONSOLE_ERR(err);
++			return err;
++		}
++
++		num++;
++		iput(cur);
++		cur = new_snap;
++		tmp = cur_link_buf;
++		cur_link_buf = next_link_buf;
++		next_link_buf = tmp;
++	}
++
++	handle = ext4_journal_start(inode,
++				       ext4_calc_snapshot_link_credits(inode,
++						SNAPSHOT_DELETE_NEW_OP));
++	if (IS_ERR(handle)) {
++		err2 = PTR_ERR(handle);
++		SNAPSHOT_CONSOLE_ERR(err);
++		CERROR("fail to journal start. inode=%lu err=%d\n",
++		       inode->i_ino, err);
++		goto out_iput;
++	}
++
++	if (cur == NULL)
++		my_link.new_ino = 0;
++	else if (my_link.new_ino != cur->i_ino) {
++		my_link.new_ino = cur->i_ino;
++		/* for rollback */
++		tmp_ino = cur_link_buf->old_ino;
++		cur_link_buf->old_ino = inode->i_ino;
++		err2 = ext4_snapshot_set_link(handle, cur, cur_link_buf);
++		if (err2) {
++			SNAPSHOT_CONSOLE_ERR(err2);
++			goto out_stop;
++		}
++	}
++	err2 = ext4_snapshot_set_link(handle, inode, &my_link);
++	/* rollback */
++	if (err2 && cur != NULL) {
++		cur_link_buf->old_ino = tmp_ino;
++		ext4_snapshot_set_link(handle, cur, cur_link_buf);
++	}
++ out_stop:
++	ext4_journal_stop(handle);
++ out_iput:
++	*array_num = num;
++	if (cur)
++		iput(cur);
++
++	return err2 ? err2 : err;
++}
++EXPORT_SYMBOL(ext4_snapshot_get_orphan);
++
++static int ext4_snapshot_delete_link(handle_t *handle,
++					struct inode *inode,
++					struct inode *new_snap,
++					struct inode *old_snap,
++					struct ext4_snapshot_link *my_link,
++					struct ext4_snapshot_link *new_link,
++					struct ext4_snapshot_link *old_link,
++					void *orig_fid)
++{
++	int			err = 0;
++	unsigned long old_snap_new_ino = 0;
++
++	/* Older snapshot linked inode is exist ? */
++	if (old_snap) {
++		/* for rollback */
++		old_snap_new_ino = old_link->new_ino;
++
++		/* update old snapshot link */
++		old_link->new_ino = my_link->new_ino;
++		err = ext4_snapshot_set_link(handle, old_snap, old_link);
++		if (err)
++			return err;
++	}
++
++	/* Newer snapshot linked inode is exist ? */
++	if (new_snap) {
++		if (EXT4_TEST_OST_SNAPSHOT_ORIG(new_snap)
++		    && my_link->old_ino == 0) {
++			EXT4_CLEAR_OST_SNAPSHOT_FLAGS(new_snap);
++
++			/* delete snapshot link */
++			err = ext4_snapshot_del_link(handle, new_snap);
++			if (err)
++				EXT4_SET_OST_SNAPSHOT_ORIG(new_snap);
++
++			ext4_mark_inode_dirty(handle, new_snap);
++			if (err == 0) {
++				err = ext4_snapshot_get_ostfid(new_snap,
++						  (struct lu_fid *)orig_fid);
++				if (err) {
++					SNAPSHOT_CONSOLE_ERR(err);
++				} else {
++					/* case of last snapshot deletion */
++					err = 1;
++				}
++			}
++		} else {
++			new_link->old_ino = my_link->old_ino;
++			/* update newer snapshot link */
++			err = ext4_snapshot_set_link(handle,
++							new_snap, new_link);
++		}
++
++		if (err < 0) {
++			if (old_snap) {
++				/* rollback */
++				old_link->new_ino = old_snap_new_ino;
++				ext4_snapshot_set_link(handle,
++							  old_snap, old_link);
++			} else {
++				/* When I'm oldest snapshot, force delete. */
++				err = 0;
++			}
++			return err;
++		}
++	}
++	/* Even if an error occurs, do not roll back,
++	 * because it is node to be deleted.
++	 */
++	my_link->old_ino = my_link->new_ino = 0;
++	ext4_snapshot_set_link(handle, inode, my_link);
++
++	return err;
++}
++
++int ext4_snapshot_destroy(struct inode *inode, void *orig_fid)
++{
++	struct ext4_snapshot_link my_link, new_link, old_link;
++	struct inode *old_snap = NULL, *new_snap = NULL;
++	handle_t *handle;
++	int err, credits;
++
++	err = ext4_snapshot_get_link(inode, &my_link);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		return err;
++	}
++
++	new_snap = ext4_snapshot_read_link(inode,
++					      &my_link, &new_link,
++					      true, &err);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		err = 0;
++		my_link.new_ino = 0;
++	}
++
++	old_snap = ext4_snapshot_read_link(inode,
++					      &my_link, &old_link,
++					      false, &err);
++	if (err) {
++		SNAPSHOT_CONSOLE_ERR(err);
++		err = 0;
++		my_link.old_ino = 0;
++	}
++
++	credits = ext4_calc_snapshot_link_credits(inode, SNAPSHOT_DELETE_OP);
++
++	/* If inode is the oldest snapshot.
++	 * not necessary to do anything */
++	handle = ext4_journal_start(inode, credits);
++	if (IS_ERR(handle)) {
++		err = PTR_ERR(handle);
++		SNAPSHOT_CONSOLE_ERR(err);
++		CERROR("fail to journal start. inode=%lu err=%d\n",
++		       inode->i_ino, err);
++		goto out_err;
++	}
++
++	if (old_snap) {
++		ext4_lblk_t  end_block;
++		__u64 end = inode->i_size;
++		if (end > old_snap->i_size)
++			end = old_snap->i_size;
++
++		if (end == 0)
++			goto skip_copy;
++
++		end_block = (end + EXT4_BLOCK_SIZE(inode->i_sb) - 1)
++			>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
++
++		err = ext4_snapshot_copy_blocks(handle, inode, old_snap,
++						   0, end_block, false);
++		if (err) {
++			SNAPSHOT_CONSOLE_ERR(err);
++			goto out_stop;
++		}
++	}
++
++	if (!ext4_handle_has_enough_credits(handle, credits)) {
++		err = ext4_journal_extend(handle, credits);
++		if (err < 0) {
++			CERROR("couldn't extend journal inode=%lu "
++			       "handle=%p need=%d has=%d err=%d\n",
++			       inode->i_ino, handle, credits,
++			       handle->h_buffer_credits, err);
++			goto out_stop;
++		} else if (err != 0) {
++			err = ext4_journal_restart(handle, credits);
++			if (err) {
++				/*
++				 * This should never happen.
++				 * It may panic with ext4_journal_stop()
++				 * if ext4_journal_restart() fails.
++				 */
++				CERROR("couldn't extend and restart "
++				       "journal inode=%lu handle=%p "
++				       "need=%d err=%d\n",
++				       inode->i_ino, handle, credits, err);
++				goto out_stop;
++			}
++		}
++	}
++ skip_copy:
++	err = ext4_snapshot_delete_link(handle, inode, new_snap, old_snap,
++					   &my_link, &new_link, &old_link,
++					   orig_fid);
++
++	SNAPSHOT_CONSOLE_ERR(err);
++
++ out_stop:
++	ext4_journal_stop(handle);
++ out_err:
++	if (new_snap)
++		iput(new_snap);
++	if (old_snap)
++		iput(old_snap);
++	return err;
++}
++EXPORT_SYMBOL(ext4_snapshot_destroy);
++
++int ext4_snapshot_get_old_list(struct inode *inode,
++				  void *buf)
++{
++	struct lu_buf *bufp = (struct lu_buf *)buf;
++	struct snapshot_list_buf *list_buf = NULL;
++	struct inode   *cur = NULL, *next_inode = NULL;
++	struct ext4_snapshot_gen_lock *lock = NULL;
++	struct ext4_snapshot_link    cur_link, next_link;
++	struct ext4_snapshot_link *cur_link_buf = NULL,
++		*next_link_buf = NULL, *tmp = NULL;
++	int i = 0, rc = 0;
++
++	if (bufp) {
++		if (bufp->lb_len < sizeof(struct snapshot_list_buf)) {
++			CERROR("invalid snapshot_list_buf\n");
++			return -EFAULT;
++		}
++		list_buf = (struct snapshot_list_buf *)bufp->lb_buf;
++	}
++	rc = ext4_snapshot_lock(inode, false, &lock);
++	if (rc) {
++		CERROR("fail to lock snapshot. err=%d\n", rc);
++		return rc;
++	}
++	if (!lock || !EXT4_TEST_OST_SNAPSHOT_DEL(inode)) {
++		CERROR("inode is not orphan snapshot ino=%ld\n",
++		       inode->i_ino);
++		rc = -ENOENT;
++		goto out;
++	}
++	rc = ext4_snapshot_get_link(inode, &cur_link);
++	if (rc)
++		goto out;
++
++	if (list_buf) {
++		list_buf->list_data[i].ost_ino = inode->i_ino;
++		list_buf->list_data[i].fid.f_seq = 0;
++	}
++
++	next_link_buf = &next_link;
++	cur_link_buf = &cur_link;
++	cur = inode;
++
++	i++;
++
++	while (cur_link_buf->old_ino) {
++		next_inode = ext4_snapshot_read_link(cur, cur_link_buf,
++							next_link_buf,
++							false, &rc);
++		if (rc) {
++			if (rc == -EXDEV &&
++			    EXT4_TEST_OST_SNAPSHOT_DEL(cur))
++				CWARN("orphan inode has invalid old snapshot link. "
++				      "probably failed to delete inode "
++				      "after being removed from snapshot link. "
++				      "inode=%lu\n", cur->i_ino);
++			if (list_buf && (cur != inode)) {
++				/* if cur == inode,
++				 * do not set the command to error
++				 * in order to make inode deleteable.
++				 */
++				list_buf->list_data[i].fid.f_oid = -rc;
++				list_buf->list_data[i].ost_ino = 0;
++				i++;
++			}
++			CERROR("invalid old link err=%d inode=%lu\n",
++			       rc, cur->i_ino);
++			rc = 0;
++			break;
++		}
++		cur = next_inode;
++		tmp = cur_link_buf;
++		cur_link_buf = next_link_buf;
++		next_link_buf = tmp;
++
++		if (!list_buf) {
++			rc = -EEXIST;
++			break;
++		}
++		if (i >= IOC_SNAPSHOT_LIST_MAX) {
++			rc = -EAGAIN;
++			break;
++		}
++
++		list_buf->list_data[i].ost_ino = cur->i_ino;
++		if (EXT4_TEST_OST_SNAPSHOT_DEL(cur)) {
++			list_buf->list_data[i].fid.f_seq = 0;
++			list_buf->list_data[i].fid.f_oid = 0;
++		} else {
++			struct lu_fid   ff;
++			/* pack mdt_fid */
++			rc = ext4_xattr_get(next_inode,
++					       EXT4_XATTR_INDEX_TRUSTED,
++					       "fid", (void *)&ff,
++					       sizeof(ff));
++			if (rc == sizeof(ff)) {
++				memcpy(&list_buf->list_data[i].fid, &ff,
++				       sizeof(ff));
++				/* Currently, the f_ver is not the real parent
++				 * MDT-object's FID::f_ver, instead it
++				 * is the OST-object index in its
++				 * parent MDT-object's layout EA. */
++				list_buf->list_data[i].fid.f_ver = 0;
++			} else {
++				list_buf->list_data[i].fid.f_seq = 0;
++				if (rc > 0)
++					list_buf->list_data[i].fid.f_oid = ENOENT;
++				else
++					list_buf->list_data[i].fid.f_oid = -rc;
++			}
++			rc = 0;
++		}
++		i++;
++	}
++	iput(cur);
++
++	if (list_buf)
++		list_buf->list_num = i;
++out:
++	if (lock)
++		ext4_snapshot_unlock(lock);
++	return rc;
++}
++EXPORT_SYMBOL(ext4_snapshot_get_old_list);
++
++int ext4_snapshot_list_orphan(struct super_block *sb,
++				 void *buf)
++{
++	struct lu_buf *bufp = (struct lu_buf *)buf;
++	struct snapshot_list_buf *list_buf;
++	struct buffer_head *inode_bitmap_bh = NULL;
++	ext4_group_t ngroups;
++	unsigned long ino = 0;
++	struct inode *inode;
++	ext4_group_t i;
++	int j = 0, rc = 0;
++
++	if (!bufp || bufp->lb_len < sizeof(struct snapshot_list_buf)) {
++		CERROR("invalid snapshot_list_buf\n");
++		return -EFAULT;
++	}
++	list_buf = (struct snapshot_list_buf *)bufp->lb_buf;
++
++	ngroups = ext4_get_groups_count(sb);
++	for (i = 0, j = 0; i < ngroups; i++, ino = 0) {
++		inode_bitmap_bh = ext4_read_inode_bitmap(sb, i);
++		if (!inode_bitmap_bh) {
++			CERROR("fail to get inode_bitmap group=%u\n",
++			       i);
++			return -EIO;
++		}
++repeat_in_this_group:
++		ino = ext4_find_next_bit((unsigned long *)
++					    inode_bitmap_bh->b_data,
++					    EXT4_INODES_PER_GROUP(sb), ino);
++
++		if (++ino > EXT4_INODES_PER_GROUP(sb)) {
++			brelse(inode_bitmap_bh);
++			continue;
++		}
++
++		inode = ext4_iget(sb,
++				     ino + (EXT4_INODES_PER_GROUP(sb) * i));
++		if (!IS_ERR(inode)) {
++			if (EXT4_TEST_OST_SNAPSHOT_DEL(inode)) {
++				if (j >= IOC_SNAPSHOT_LIST_MAX) {
++					rc = -EAGAIN;
++					brelse(inode_bitmap_bh);
++					iput(inode);
++					break;
++				}
++				list_buf->list_data[j].ost_ino = inode->i_ino;
++				/* pack ost_fid */
++				rc = ext4_snapshot_get_ostfid(inode,
++						&list_buf->list_data[j].fid);
++				if (rc) {
++					list_buf->list_data[j].fid.f_seq = 0;
++					list_buf->list_data[j].fid.f_oid = -rc;
++					rc = 0;
++				}
++				j++;
++			}
++			iput(inode);
++		} else {
++			CDEBUG(D_INFO, "failed to get inode ino=%lu\n", ino);
++		}
++		goto repeat_in_this_group;
++	}
++	list_buf->list_num = j;
++	return rc;
++}
++EXPORT_SYMBOL(ext4_snapshot_list_orphan);
++
++/*
++ * ext4_snapshot_init()
++ *
++ * initialize snapshot at module loading
++ *
++ * \param[in]	none
++ *
++ * \retval	none
++ */
++void ext4_snapshot_init(void)
++{
++	/* create mutex object for snapshot lock list */
++	mutex_init(&snap_list_mutex);
++
++	/* initialize snapshot lock list */
++	INIT_LIST_HEAD(&snap_lock_list);
++
++	return;
++}
++
++/*
++ * ext4_snapshot_exit()
++ *
++ * finalize snapshot at module removing
++ *
++ * \param[in]	none
++ *
++ * \retval	none
++ */
++void ext4_snapshot_exit(void)
++{
++	struct ext4_snapshot_gen_lock	*entry, *tmp;
++
++	/* lock snapshot lock list */
++	mutex_lock(&snap_list_mutex);
++	/* delete & free all snapshot generation lock object from list */
++	list_for_each_entry_safe(entry, tmp, &snap_lock_list, list) {
++		/* delete lock object from list */
++		list_del(&entry->list);
++		/* free napshot lock object */
++		kfree(entry);
++	}
++	/* unlock snapshot lock list */
++	mutex_unlock(&snap_list_mutex);
++	/* destroy snapshot list lock object */
++	mutex_destroy(&snap_list_mutex);
++	return;
++}
+diff -urN -x .svn linux-stage.org/fs/ext4/snapshot.h linux-stage/fs/ext4/snapshot.h
+--- linux-stage.org/fs/ext4/snapshot.h	1970-01-01 09:00:00.000000000 +0900
++++ linux-stage/fs/ext4/snapshot.h	2018-11-01 12:13:30.000000000 +0900
+@@ -0,0 +1,368 @@
++/*
++ * GPL HEADER START
++ *
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 only,
++ * as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License version 2 for more details.  A copy is
++ * included in the COPYING file that accompanied this code.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ *
++ * GPL HEADER END
++ */
++/*
++ *   Copyright(c) 2016-2018 FUJITSU LIMITED.
++ *   All rights reserved.
++ */
++
++#ifndef _EXT4_SNAPSHOT_H
++#define _EXT4_SNAPSHOT_H
++
++#include "ext4_extents.h"
++#include "xattr.h"
++#define	DEBUG_SNAPSHOT
++
++#ifdef DEBUG_SNAPSHOT
++extern void ext4_show_snapshot_link(struct inode *inode,
++					const char *msg);
++extern void ext4_show_snapshot_blocks(struct inode *inode,
++					const char *msg, int flag);
++#else
++#define	ext4_show_snapshot_link(inode, msg)
++#define	ext4_show_snapshot_blocks(inode, msg, flag)
++#endif
++
++
++#define EXT4_XATTR_NAME_SNAPSHOT_LINK	"snapshot_link"
++
++/* snapshot flags in ext4.
++ * other snapshot flags are defined in
++ * lustre/include/lustre_snapshot.h */
++#define SNAPSHOT_FLAGS_MASK						\
++	(EXT4_SNAPSHOT_FL | EXT4_SNAPSHOT_SP_FL | EXT4_SNAPSHOT_SHARE_FL)
++
++#define SNAPSHOT_FLAGS_NODELMASK		\
++	(EXT4_SNAPSHOT_SP_FL | EXT4_SNAPSHOT_SHARE_FL)
++
++#define OST_SNAPSHOT_ORIG_PATT	EXT4_SNAPSHOT_SHARE_FL
++#define OST_SNAPSHOT_FILE_PATT \
++	(EXT4_SNAPSHOT_SHARE_FL | EXT4_SNAPSHOT_SP_FL)
++#define OST_SNAPSHOT_DEL_PATT \
++	(EXT4_SNAPSHOT_SHARE_FL | EXT4_SNAPSHOT_FL)
++
++#define EXT4_TEST_OST_SNAPSHOT_FILE(inode)	\
++	((EXT4_I(inode)->i_flags & SNAPSHOT_FLAGS_NODELMASK) == \
++						OST_SNAPSHOT_FILE_PATT)
++
++#define EXT4_TEST_OST_SNAPSHOT_ORIG(inode)	\
++	((EXT4_I(inode)->i_flags & SNAPSHOT_FLAGS_NODELMASK) == \
++						OST_SNAPSHOT_ORIG_PATT)
++
++/* test if  OST_SNAPSHOT_FILE or OST_SNAPSHOT_ORIG */
++#define EXT4_TEST_OST_SNAPSHOT(inode)		\
++	((EXT4_I(inode)->i_flags & EXT4_SNAPSHOT_SHARE_FL) != 0)
++
++#define EXT4_TEST_OST_SNAPSHOT_DEL(inode)	\
++	((EXT4_I(inode)->i_flags & OST_SNAPSHOT_DEL_PATT) == \
++						OST_SNAPSHOT_DEL_PATT)
++
++#define EXT4_SET_OST_SNAPSHOT_ORIG(inode)				\
++do {									\
++	ext4_clear_inode_flag(inode, EXT4_INODE_SNAPSHOT);	\
++	ext4_clear_inode_flag(inode, EXT4_INODE_SNAPSHOT_SP);	\
++	ext4_set_inode_flag(inode, EXT4_INODE_SNAPSHOT_SHARE);	\
++	ext4_set_inode_flags(inode);					\
++} while (0)
++
++#define EXT4_SET_OST_SNAPSHOT_FILE(inode)				\
++do {									\
++	ext4_clear_inode_flag(inode, EXT4_INODE_SNAPSHOT);	\
++	ext4_set_inode_flag(inode, EXT4_INODE_SNAPSHOT_SP);	\
++	ext4_set_inode_flag(inode, EXT4_INODE_SNAPSHOT_SHARE);	\
++	ext4_set_inode_flags(inode);					\
++} while (0)
++
++#define EXT4_CLEAR_OST_SNAPSHOT_FLAGS(inode)				\
++do {									\
++	ext4_clear_inode_flag(inode, EXT4_INODE_SNAPSHOT);	\
++	ext4_clear_inode_flag(inode, EXT4_INODE_SNAPSHOT_SP);	\
++	ext4_clear_inode_flag(inode, EXT4_INODE_SNAPSHOT_SHARE);	\
++	ext4_set_inode_flags(inode);					\
++} while (0)
++
++#define EXT4_SNAPSHOT_SET_SPARSE_EXTENT(ext)				\
++do {									\
++	ext4_ext_store_pblock(ext, 0);				\
++	ext4_ext_mark_uninitialized(ext);				\
++} while (0)
++
++#define EXT4_SNAPSHOT_TEST_SPARSE_EXTENT(ext)			\
++	((ext4_ext_pblock(ext) == 0)					\
++	 && ext4_ext_is_uninitialized(ext))
++
++#define	SNAPSHOT_LINK_SIZE		(sizeof(struct ext4_snapshot_link))
++
++/* snapshot error code in ext4.
++ * other snapshot error codes are defined in
++ * lustre/include/lustre_snapshot.h */
++#define	SNAPSHOT_ERR_NOMEM		2450
++#define	SNAPSHOT_ERR_NOSPC		2451
++#define	SNAPSHOT_ERR_SYSERR		2452
++
++#define SNAPSHOT_ERR_MSG1		"Snapshot cannot allocate memory.\n"
++#define SNAPSHOT_ERR_MSG2		"Snapshot no disk space left.\n"
++#define SNAPSHOT_ERR_MSG3		"Snapshot system error. " \
++					"func=%s route=%d code=%d\n"
++
++
++#define SNAPSHOT_CONSOLE_ERR(err)
++
++enum {
++	SNAPSHOT_CREATE_OP,
++	SNAPSHOT_DELETE_OP,
++	SNAPSHOT_DELETE_NEW_OP,
++	SNAPSHOT_CLEAR_LINK_OP,
++	SNAPSHOT_OP_MAX
++};
++
++/* snapshot lock wait timeout [sec] */
++#define	SNAPSHOT_LOCK_TIMEOUT		200
++/* snapshot lock wait frequency count (1000 / delay[ms] : delay=10ms) */
++#define	SNAPSHOT_LOCK_FREQ		100
++
++/* define snapshot unlink state */
++#define SNAPSHOT_DELETE_BLOCK		0
++#define SNAPSHOT_DELETE_FILE		1
++#define SNAPSHOT_DELETE_ERROR		2
++
++#define SNAPSHOT_MATCH_TS(t1, t2)		\
++	(((t1)->tv_sec == (t2)->tv_sec) &&	\
++	 ((t1)->tv_nsec == (t2)->tv_nsec))
++#define SNAPSHOT_CLTIME(t) \
++	((((__u64)(t)->tv_sec) << 30) + (t)->tv_nsec)
++
++
++#ifndef _EXT4_DEBUG_H_
++#define _EXT4_DEBUG_H_
++
++#ifdef EXT4_DEBUG
++#define D_TRACE       0x00000001 /* ENTRY/EXIT markers */
++
++#define CDEBUG(mask, format, a...)                                      \
++	printk("<5>Lustre: %d:%d:(%s:%d:%s()) " format,                 \
++		0, 0, __FILE__, __LINE__, __FUNCTION__, ## a);
++
++#define GOTO(label)                                                     \
++do {                                                                    \
++	CDEBUG(D_TRACE, "Process going to %s\n", #label);               \
++	goto label;                                                     \
++} while (0)
++
++#define GOTO_ERROR(label, err)                                          \
++do {                                                                    \
++	long GOTO__err = (long)(err);                                   \
++	CDEBUG(D_TRACE, "Process leaving via %s (err=%lu : %ld : %lx)\n",\
++		#label, (unsigned long)GOTO__err, (signed long)GOTO__err,\
++		(signed long)GOTO__err);                                 \
++	goto label;                                                     \
++} while (0)
++
++#define RETURN(rc)                                                      \
++do {                                                                    \
++	typeof(rc) RETURN__ret = (rc);                                  \
++	CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n",       \
++		(long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
++	return RETURN__ret;                                             \
++} while (0)
++
++#define RETURN_ERROR(rc, err)                                           \
++do {                                                                    \
++	typeof(rc) RETURN__ret = (rc);                                  \
++	typeof(err) RETURN__err = (err);                                \
++	CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx) (err=%lu : %ld : %lx)\n",         \
++		(long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret,                         \
++		(long)RETURN__err, (long)RETURN__err, (long)RETURN__err);                        \
++	return RETURN__ret;                                                                     \
++} while (0)
++
++#define ENTRY  CDEBUG(D_TRACE, "Process entered\n");
++
++#define EXIT                                                            \
++do {                                                                    \
++	CDEBUG(D_TRACE, "Process leaving\n");                           \
++	EXIT_NESTING;                                                   \
++} while (0)
++
++#define LOG(fmt, ...) printk("<5>EXT4 %s %d: " fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__);
++
++#else /* !EXT4_DEBUG */
++#include <libcfs/libcfs.h>
++#define LOG(fmt, ...)
++#endif
++
++#endif
++
++
++/* snapshot generation linked list */
++struct ext4_snapshot_link {
++	unsigned long	new_ino;	/* newer snapshot inode no */
++	unsigned long	old_ino;	/* older snapshot inode no */
++	unsigned long	org_ino;	/* original file inode no */
++	struct timespec	org_ts;		/* original file timestamp */
++};
++
++/* snapshot generation lock list */
++struct ext4_snapshot_gen_lock {
++	unsigned long		org;	/* original file inode no */
++	struct timespec		ts;	/* original file timestamp */
++	struct list_head	list;	/* list_head structure */
++};
++
++/* moved from lustre/osd-ext4/osd_io.c */
++struct bpointers {
++	unsigned long *blocks;
++	unsigned long start;
++	int num;
++	int init_num;
++	int create;
++};
++
++
++extern int ext4_get_snapshot_lock_timeout(void);
++extern void ext4_set_snapshot_lock_timeout(int);
++extern int ext4_snapshot_lock(struct inode *, bool,
++				 struct ext4_snapshot_gen_lock **);
++extern void ext4_snapshot_unlock(struct ext4_snapshot_gen_lock *);
++extern struct inode *ext4_snapshot_read_link(struct inode *,
++						struct ext4_snapshot_link *,
++						struct ext4_snapshot_link *,
++						bool, int*);
++extern int ext4_snapshot_copy_blocks(handle_t *, struct inode *,
++					struct inode *,
++					ext4_lblk_t,
++					ext4_lblk_t, bool);
++extern int ext4_snapshot_truncate_blocks(handle_t *, struct inode *,
++					    struct inode *,
++					    ext4_lblk_t,
++					    ext4_lblk_t);
++extern int ext4_snapshot_punch(handle_t *, struct inode *,
++				   __u64, __u64);
++extern void ext4_snapshot_init(void);
++extern void ext4_snapshot_exit(void);
++
++static inline int ext4_calc_snapshot_link_credits(struct inode *inode,
++						     int op)
++{
++	int credits = 0;
++	struct super_block *sb = inode->i_sb;
++	/* calc journal credits
++	 * below section is calculating credits for ext4_xattr_set() */
++	credits = EXT4_DATA_TRANS_BLOCKS(sb);
++	if ((SNAPSHOT_LINK_SIZE >= EXT4_XATTR_MIN_LARGE_EA_SIZE(sb->s_blocksize)) &&
++	    EXT4_HAS_INCOMPAT_FEATURE(sb,
++					 EXT4_FEATURE_INCOMPAT_EA_INODE)) {
++		int nrblocks = (SNAPSHOT_LINK_SIZE + sb->s_blocksize - 1) >>
++			sb->s_blocksize_bits;
++		/* For new inode */
++		credits += EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + 3;
++		/* For data blocks of EA inode */
++		credits += ext4_meta_trans_blocks(inode, nrblocks, 0);
++	}
++	switch (op) {
++	case SNAPSHOT_CREATE_OP:
++		/* below credits is
++		 * 1 : for create original link
++		 * 1 : for create new snapshot link
++		 * 1 : for create old snapshot link
++		 * 1 : for delete original link (rollback)
++		 * 1 : for delete new snapshot link (rollback)
++		 * 1 : for delete old snapshot link (rollback) */
++		credits *= 6;
++		return credits;
++
++	case SNAPSHOT_DELETE_OP:
++		/* below credits is
++		 * 1 : for update target snapshot link
++		 * 1 : for update newer snapshot link
++		 * 1 : for update older snapshot link
++		 * 1 : for update newer snapshot link (rollback) */
++		credits *= 4;
++		return credits;
++
++	case SNAPSHOT_DELETE_NEW_OP:
++		/* below credits is
++		 * 1 : for update target snapshot link
++		 * 1 : for update newer snapshot link */
++		credits *= 2;
++		return credits;
++	case SNAPSHOT_CLEAR_LINK_OP:
++		/* below credits is
++		 * 1 : for update target snapshot link */
++		return credits;
++	default:
++		CERROR("invalid opc=%d\n", op);
++		return 0;
++	}
++}
++
++static inline int ext4_snapshot_set_link(handle_t *handle,
++					    struct inode *inode,
++					    struct ext4_snapshot_link *link)
++{
++	int err = 0;
++	err =  ext4_xattr_set_handle(handle, inode,
++					EXT4_XATTR_INDEX_TRUSTED,
++					EXT4_XATTR_NAME_SNAPSHOT_LINK,
++					(void *)link, SNAPSHOT_LINK_SIZE, 0);
++	if (err)
++		CERROR("fail to set snapshot link. "
++		       "inode=%lu err=%d\n", inode->i_ino, err);
++	return err;
++}
++
++static inline int ext4_snapshot_get_link(struct inode *inode,
++					    struct ext4_snapshot_link *link)
++{
++	int err = 0;
++	if (link == NULL)
++		BUG();
++
++	err =  ext4_xattr_get(inode,
++				 EXT4_XATTR_INDEX_TRUSTED,
++				 EXT4_XATTR_NAME_SNAPSHOT_LINK,
++				 (void *)link, SNAPSHOT_LINK_SIZE);
++	if (err != SNAPSHOT_LINK_SIZE) {
++		if (err >= 0)
++			err = -ENOLINK;
++		if (err < 0)
++			CDEBUG(D_ERROR, "fail to get snapshot link. "
++			       "inode=%lu err=%d\n", inode->i_ino, err);
++	} else {
++		err = 0;
++	}
++	return err;
++}
++
++static inline int ext4_snapshot_del_link(handle_t *handle,
++					    struct inode *inode)
++{
++	int err = 0;
++	err = ext4_xattr_set_handle(handle, inode,
++				       EXT4_XATTR_INDEX_TRUSTED,
++				       EXT4_XATTR_NAME_SNAPSHOT_LINK,
++				       NULL, 0, 0);
++	if (err)
++		CERROR("fail to delete snapshot link. "
++		       "inode=%lu err=%d\n", inode->i_ino, err);
++	return err;
++}
++#endif	/* _LDISKF_SNAPSHOT_H */
+diff -urN -x .svn linux-stage.org/fs/ext4/snapshot_debug.h linux-stage/fs/ext4/snapshot_debug.h
+--- linux-stage.org/fs/ext4/snapshot_debug.h	1970-01-01 09:00:00.000000000 +0900
++++ linux-stage/fs/ext4/snapshot_debug.h	2018-09-03 14:15:30.000000000 +0900
+@@ -0,0 +1,272 @@
++/*
++ * GPL HEADER START
++ *
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 only,
++ * as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License version 2 for more details.  A copy is
++ * included in the COPYING file that accompanied this code.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ *
++ * GPL HEADER END
++ */
++/*
++ *   Copyright(c) 2016-2018 FUJITSU LIMITED.
++ *   All rights reserved.
++ */
++
++#ifndef _SNAPSHOT_DEBUG_H
++#define _SNAPSHOT_DEBUG_H
++
++#ifdef DEBUG_SNAPSHOT
++/*
++ * ext4_show_snapshot_link()
++ *
++ * show snapshot link list to syslog
++ *
++ * \param[in]	inode		target inode
++ * \param[in]	msg		title message
++ *
++ * \retval	none
++ */
++void ext4_show_snapshot_link(struct inode *inode, const char *msg)
++{
++	struct inode	*next_inode;
++	int		no, err;
++	struct ext4_snapshot_link	link;
++
++	if (!EXT4_TEST_OST_SNAPSHOT(inode))
++		return;
++
++	printk(KERN_ERR "----- snapshot link summary [%-10s] "
++			"------------------\n", msg);
++	/* find newest snapshot linked inode */
++	next_inode = inode;
++	err = ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED,
++			EXT4_XATTR_NAME_SNAPSHOT_LINK,
++			(void *)&link, sizeof(link));
++	if (err < 0) {
++		printk(KERN_ERR "SNAPSHOT: ERROR: Cannot find xattr "
++				"ino=%ld\n", inode->i_ino);
++		return;
++	}
++	while (link.new_ino) {
++		next_inode = ext4_iget(inode->i_sb, link.new_ino);
++		if (IS_ERR(next_inode)) {
++			printk(KERN_ERR "SNAPSHOT: ERROR: Cannot find inode "
++			       "ino=%ld\n", link.new_ino);
++			return;
++		}
++		err = ext4_xattr_get(next_inode,
++				EXT4_XATTR_INDEX_TRUSTED,
++				EXT4_XATTR_NAME_SNAPSHOT_LINK,
++				(void *)&link, sizeof(link));
++		if (err < 0) {
++			printk(KERN_ERR "SNAPSHOT: ERROR: Cannot find xattr "
++					"ino=%ld\n", next_inode->i_ino);
++			return;
++		}
++		iput(next_inode);
++	}
++	/* show snapshot link info foreach */
++	no = 0;
++	printk(KERN_ERR "SNAPSHOT: SNAP%02d ino=%lu flag=%08X new=%lu "
++			"old=%lu\n", no, next_inode->i_ino,
++			next_inode->i_flags, link.new_ino, link.old_ino);
++
++	while (link.old_ino) {
++		bool me = false;
++		if (no > 10) {
++			printk(KERN_ERR "SNAPSHOT: ERROR: Too many inodes "
++					"of snapshot link\n");
++			break;
++		}
++		if (link.old_ino != inode->i_ino) {
++			next_inode = ext4_iget(inode->i_sb, link.old_ino);
++			if (IS_ERR(next_inode)) {
++				printk(KERN_ERR "SNAPSHOT: ERROR: Cannot find inode "
++				       " ino=%ld\n", link.old_ino);
++				return;
++			}
++		} else {
++			next_inode = inode;
++			me = true;
++		}
++		err = ext4_xattr_get(next_inode,
++				EXT4_XATTR_INDEX_TRUSTED,
++				EXT4_XATTR_NAME_SNAPSHOT_LINK,
++				(void *)&link, sizeof(link));
++		if (err < 0) {
++			printk(KERN_ERR "SNAPSHOT: ERROR: Cannot find xattr "
++					"ino=%ld\n", next_inode->i_ino);
++			return;
++		}
++		no++;
++		printk(KERN_ERR "SNAPSHOT:%sSNAP%02d ino=%lu flag=%08X "
++		       "new=%lu old=%lu\n", (me ? "*" : " "),
++		       no, next_inode->i_ino,
++		       next_inode->i_flags, link.new_ino,
++		       link.old_ino);
++		if (!me)
++			iput(next_inode);
++	}
++	printk(KERN_ERR "--------------------------------------------------"
++			"---------\n");
++	return;
++}
++EXPORT_SYMBOL(ext4_show_snapshot_link);
++
++/*
++ * ext4_show_inode_blocks()
++ *
++ * show specified inode extent blocks summery to syslog
++ *
++ * \param[in]	inode		target inode
++ *
++ * \retval	none
++ */
++static void ext4_show_inode_blocks(struct inode *inode)
++{
++	struct ext4_extent	*ex = NULL;
++	struct ext4_ext_path	*p, *path = NULL;
++	ext4_fsblk_t		pblock;
++	ext4_lblk_t		block, ee_block, max;
++	unsigned short		ee_len;
++	int			line;
++
++	/* max : logical block no (to) */
++	max = (inode->i_size + EXT4_BLOCK_SIZE(inode->i_sb) - 1)
++			>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
++	printk(KERN_ERR "SNAPSHOT: ino=%lu i_blocks=%lu i_size=%llu "
++			"max=%u\n", inode->i_ino, inode->i_blocks,
++			inode->i_size, max);
++
++	/* repeat until block num */
++	for (block = line = 0; ((block < max) && (line < 30)); line++) {
++		/* get extent path of compare inode */
++		p = ext4_ext_find_extent(inode, block, path);
++		if (IS_ERR(p)) {
++			/* not exist extent path */
++			printk(KERN_ERR "SNAPSHOT: fail to get extent: "
++					"lblk=%u, err=%ld", block, PTR_ERR(p));
++			break;
++		}
++		path = p;
++
++		/* get physical block no of compare inode */
++		ex = path[ext_depth(inode)].p_ext;
++		if (!ex) {
++			printk(KERN_ERR "SNAPSHOT: ino=%lu No extent\n",
++			       inode->i_ino);
++			break;
++		}
++
++		/* get block num of compare extent */
++		ee_block = le32_to_cpu(ex->ee_block);
++		ee_len = ext4_ext_get_actual_len(ex);
++		if (block >= ee_block + ee_len) {
++			block++;
++			continue;
++		}
++
++		block = ee_block + ee_len;
++		pblock = ext4_ext_pblock(ex);
++		printk(KERN_ERR "SNAPSHOT: ino=%lu "
++				"block[%u - %u] pblock=[%Lu - %Lu]\n",
++				inode->i_ino, ee_block, block - 1,
++				pblock, pblock + ee_len - 1);
++	}
++	if (path) {
++		ext4_ext_drop_refs(path);
++		kfree(path);
++	}
++
++	if ((block < max) && ex)
++		printk(KERN_ERR "SNAPSHOT: ino=%lu ... since too many "
++				"blocks, interrupted.\n", inode->i_ino);
++	return;
++}
++
++/*
++ * ext4_show_snapshot_blocks()
++ *
++ * show specified inode & neary snapshot inode extent blocks summery to syslog
++ *
++ * \param[in]	inode		target inode
++ * \param[in]	msg		title message
++ * \param[in]	flag		show snapshot blocks flag 0=disable 1=enable
++ *
++ * \retval	none
++ */
++void ext4_show_snapshot_blocks(struct inode *inode, const char *msg,
++	int flag)
++{
++	struct inode		*next_inode;
++	struct ext4_snapshot_link	link;
++	int			err;
++
++	/* check if target inode is snapshot file */
++	if (!EXT4_TEST_OST_SNAPSHOT(inode))
++		return;
++
++	printk(KERN_ERR "===== inode blocks summary [%-10s] "
++			"===================\n", msg);
++	/* show specified inode blocks */
++	ext4_show_inode_blocks(inode);
++
++	/* show neary snapshot blocks too ? */
++	if (!flag)
++		goto out;
++
++	err = ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED,
++			EXT4_XATTR_NAME_SNAPSHOT_LINK,
++			(void *)&link, sizeof(link));
++	if (err < 0)
++		goto out;
++
++	/* show older snapshot inode blocks, if exist */
++	if (link.old_ino) {
++		/* get compare inode */
++		next_inode = ext4_iget(inode->i_sb, link.old_ino);
++		if (IS_ERR(next_inode)) {
++			/* not exist compare inode  */
++			goto skip;
++		}
++		printk(KERN_ERR "----- old snapshot ----------------------"
++				"-------------\n");
++		ext4_show_inode_blocks(next_inode);
++		iput(next_inode);
++	}
++
++skip:
++	/* show newer snapshot inode blocks, if exist */
++	if (link.new_ino) {
++		/* get compare inode */
++		next_inode = ext4_iget(inode->i_sb, link.new_ino);
++		if (IS_ERR(next_inode)) {
++			/* not exist compare inode  */
++			goto out;
++		}
++		printk(KERN_ERR "----- new snapshot ----------------------"
++				"-------------\n");
++		ext4_show_inode_blocks(next_inode);
++		iput(next_inode);
++	}
++
++out:
++	printk(KERN_ERR "================================================="
++			"==========\n");
++	return;
++}
++EXPORT_SYMBOL(ext4_show_snapshot_blocks);
++#endif
++#endif
+diff -urN -x .svn linux-stage.org/fs/ext4/super.c linux-stage/fs/ext4/super.c
+--- linux-stage.org/fs/ext4/super.c	2018-08-31 20:53:57.000000000 +0900
++++ linux-stage/fs/ext4/super.c	2018-10-24 14:05:04.000000000 +0900
+@@ -48,9 +48,9 @@
+ 
+ #include "ext4.h"
+ #include "ext4_jbd2.h"
+-#include "xattr.h"
+ #include "acl.h"
+ #include "mballoc.h"
++#include "snapshot.h"
+ 
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/ext4.h>
+@@ -2268,12 +2268,27 @@
+ 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
+ 		vfs_dq_init(inode);
+ 		if (inode->i_nlink) {
++			int err;
++			struct ext4_snapshot_gen_lock *lock = NULL;
++
+ 			ext4_msg(sb, KERN_DEBUG,
+ 				"%s: truncating inode %lu to %lld bytes",
+ 				__func__, inode->i_ino, inode->i_size);
+ 			jbd_debug(2, "truncating inode %lu to %lld bytes\n",
+ 				  inode->i_ino, inode->i_size);
+-			ext4_truncate(inode);
++
++			err = ext4_snapshot_orphan_truncate(inode, &lock);
++			if (err) {
++				CERROR("fail to truncate orphan inode "
++				       "OST=%s ost_inode=%lu err=%d\n",
++				       EXT4_SB(inode->i_sb)->s_es->s_volume_name,
++				       inode->i_ino, err);
++				SNAPSHOT_CONSOLE_ERR(err);
++			} else
++				ext4_truncate(inode);
++
++			if (lock)
++				ext4_snapshot_unlock(lock);
+ 			nr_truncates++;
+ 		} else {
+ 			ext4_msg(sb, KERN_DEBUG,
+@@ -5173,6 +5188,10 @@
+ 	err = init_inodecache();
+ 	if (err)
+ 		goto out1;
++
++	/* initialize snapshot function */
++	ext4_snapshot_init();
++
+ 	err = register_filesystem(&ext4_fs_type);
+ 	if (err)
+ 		goto out;
+@@ -5197,6 +5216,7 @@
+ {
+ 	ext4_destroy_lazyinit_thread();
+ 	unregister_filesystem(&ext4_fs_type);
++	ext4_snapshot_exit();
+ 	destroy_inodecache();
+ 	exit_ext4_xattr();
+ 	exit_ext4_mballoc();
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series
index fbbac67..9bbf181 100644
--- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series
+++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.5.series
@@ -48,3 +48,4 @@ rhel6.3/ext4-max-dir-size.patch
 rhel6.4/ext4-max-dir-size-options.patch
 rhel6.3/ext4-not-discard-preallocation-umount.patch
 rhel6.3/ext4-journal-path-opt.patch
+rhel6.5/dl_snapshot.patch
diff --git a/lustre/doc/lctl.8 b/lustre/doc/lctl.8
index dc71f4a..801e963 100644
--- a/lustre/doc/lctl.8
+++ b/lustre/doc/lctl.8
@@ -381,6 +381,17 @@ Stop LFSCK on all devices.
 .TP
   -h, --help
 Show this help.
+.br
+.PP
+.SS Snapshot Operations
+.TP
+.B snapshot <on | status> <fsname>
+The command controls the snapshot feature. The on argument enables the snapshot feature. The status argument shows whether the snapshot feature is enabled or not.
+Root privileges are needed to execute this command. This command must be run on the MDS node which manages the MDT0 device.
+.TP
+.B snapshot_get_orphan < --list | --fid <ost fid> | --delete [-f] <ost fid> > <OST name>
+The command controls orphan object files at OSTs. The --list option shows orphan object files at the specified OST. The --fid option shows object files which refer the specified <ost fid> object file.  The --delete option deletes the specified <ost fid> object file. If the -f option is specified, do not prompt before deleting.
+Root privileges are needed to execute this command. This command must be run on the OSS node.
 .SS Debug
 .TP 
 .BI debug_daemon 
diff --git a/lustre/doc/lfs.1 b/lustre/doc/lfs.1
index 1d5d9fb..9880fa2 100644
--- a/lustre/doc/lfs.1
+++ b/lustre/doc/lfs.1
@@ -84,6 +84,12 @@ lfs \- Lustre utility to create a file with specific striping pattern, find the
 .br
 .B lfs data_version [-n] \fB<filename>\fR
 .br
+.B lfs snapshot --create [-s <snapname>] [-d <directory>]
+.br
+.B lfs snapshot --delete -s <snapname> [-d <directory>] [-f]
+.br
+.B lfs snapshot --list [-d <directory>] [-R]
+.br
 .B lfs help
 .SH DESCRIPTION
 .B lfs
@@ -284,6 +290,31 @@ MDT0000. This is restricted to avoid creating directory trees that have
 intermediate path components on a series different MDTs and become unavailable
 if any of the intermediate MDTs are offline.
 .TP
+.B snapshot --create [-s <snapname>] [-d <directory>]
+To create a snapshot for the
+.IR directory
+with the
+.IR snapname .
+.TP
+.B snapshot --delete -s <snapname> [-d <directory>] [-f] [-I]
+To delete a snapshot named
+.IR snapname
+from the
+.IR directory .
+If the
+.B -f
+option is specified, do not prompt before deleting.
+If the
+.B -I
+option is specified, ignore restriction by CoW size.
+.TP
+.B snapshot --list [-d <directory>] [-R]
+To list snapshot information for the
+.IR directory .
+If the
+.B -R
+option is specified, list their sub directory snapshots recursively.
+.TP
 .B help 
 Provides brief help on the various arguments
 .TP
diff --git a/lustre/include/Makefile.am b/lustre/include/Makefile.am
index 093b587..302a0c9 100644
--- a/lustre/include/Makefile.am
+++ b/lustre/include/Makefile.am
@@ -90,4 +90,5 @@ EXTRA_DIST = \
 	obd_class.h \
 	obd.h \
 	obd_support.h \
-	obd_target.h
+	obd_target.h \
+	lustre_snapshot.h
diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h
index a39c461..676217c 100644
--- a/lustre/include/dt_object.h
+++ b/lustre/include/dt_object.h
@@ -173,6 +173,16 @@ struct dt_device_operations {
                                    struct dt_device *dev,
                                    int mode, unsigned long timeout,
                                    __u32 alg, struct lustre_capa_key *keys);
+
+	/**
+	 * snaphost is enabled, or tests.
+	 */
+	int   (*dt_snapshot_get_enable)(const struct lu_env *env,
+					struct dt_device *dev);
+	int   (*dt_snapshot_set_enable)(const struct lu_env *env,
+					struct dt_device *dev);
+	int   (*dt_snapshot_list_orphan)(struct dt_device *dev,
+					 void *buf);
 };
 
 struct dt_index_features {
@@ -476,6 +486,37 @@ struct dt_object_operations {
 	int (*do_object_unlock)(const struct lu_env *env, struct dt_object *dt,
 				struct ldlm_enqueue_info *einfo,
 				union ldlm_policy_data *policy);
+
+	/**
+	 * snapshot
+	 */
+	int (*do_lod_snapshot_clone)(const struct lu_env *env,
+				struct dt_object *snap_dt,
+				struct lu_attr *attr,
+				struct lu_buf *buff);
+	int (*do_osp_snapshot_clone)(const struct lu_env *env,
+				struct dt_object *snap_dt,
+				struct lu_attr *attr,
+				const struct lu_fid *orig_fid,
+				const struct lu_fid *mdt_fid);
+	int (*do_osd_declare_snapshot_clone)(const struct lu_env *env,
+				struct dt_object *snap_dt,
+				struct dt_object *orig_dt,
+				struct thandle *thandle,
+				int ignore_flag);
+	int (*do_osd_snapshot_clone)(struct dt_object *snap_dt,
+				struct dt_object *orig_dt);
+	int (*do_osd_snapshot_get_old_list)(struct dt_object *snap_dt,
+				void *list_buf);
+	int (*do_osd_snapshot_lock)(struct dt_object *dt, bool create,
+				void  **lock);
+	void (*do_osd_snapshot_unlock)(void  *lock);
+	__u32 (*do_osd_snapshot_get_info)(struct dt_object *dt);
+	int (*do_osd_snapshot_set_del_flag)(struct dt_object *dt);
+	int (*do_osd_snapshot_get_orphan)(struct dt_object *dt,
+				void *fid_buf,
+				int *array_num);
+	int (*do_osd_snapshot_destroy)(struct dt_object *dt, void *orig_fid);
 };
 
 /**
@@ -1539,6 +1580,121 @@ static inline int dt_lookup(const struct lu_env *env,
         return ret;
 }
 
+static inline int dt_snapshot_get_enable(const struct lu_env *env,
+					 struct dt_device *dt)
+{
+	LASSERT(dt);
+	LASSERT(dt->dd_ops);
+	LASSERT(dt->dd_ops->dt_snapshot_get_enable);
+	return dt->dd_ops->dt_snapshot_get_enable(env, dt);
+}
+
+static inline int dt_snapshot_set_enable(const struct lu_env *env,
+					 struct dt_device *dt)
+{
+	LASSERT(dt);
+	LASSERT(dt->dd_ops);
+	LASSERT(dt->dd_ops->dt_snapshot_set_enable);
+	return dt->dd_ops->dt_snapshot_set_enable(env, dt);
+}
+
+static inline int dt_snapshot_list_orphan(struct dt_device *dt,
+					  void *buf)
+{
+	LASSERT(dt);
+	LASSERT(dt->dd_ops);
+	LASSERT(dt->dd_ops->dt_snapshot_list_orphan);
+	return dt->dd_ops->dt_snapshot_list_orphan(dt, buf);
+}
+
+static inline int dt_snapshot_lock(struct dt_object *dt,
+					bool create, void **lock)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_osd_snapshot_lock);
+	return dt->do_ops->do_osd_snapshot_lock(dt, create, lock);
+}
+
+static inline void dt_snapshot_unlock(struct dt_object *dt, void *lock)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_osd_snapshot_unlock);
+	dt->do_ops->do_osd_snapshot_unlock(lock);
+}
+
+static inline int dt_snapshot_get_orphan(struct dt_object *dt,
+					 void *fid_buf,
+					 int *array_num)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_osd_snapshot_get_orphan);
+	return dt->do_ops->do_osd_snapshot_get_orphan(dt,
+						      fid_buf,
+						      array_num);
+}
+
+static inline int dt_snapshot_destroy(struct dt_object *dt, void *orig_fid)
+{
+	LASSERT(dt);
+	LASSERT(dt->do_ops);
+	LASSERT(dt->do_ops->do_osd_snapshot_destroy);
+	return dt->do_ops->do_osd_snapshot_destroy(dt, orig_fid);
+}
+
+static inline int dt_osp_snapshot_clone(const struct lu_env *env,
+					struct dt_object *snap_dt,
+					struct lu_attr *attr,
+					const struct lu_fid *orig_fid,
+					const struct lu_fid *mdt_fid)
+{
+	LASSERT(snap_dt);
+	LASSERT(snap_dt->do_ops);
+	LASSERT(snap_dt->do_ops->do_osp_snapshot_clone);
+	return snap_dt->do_ops->do_osp_snapshot_clone(env, snap_dt,
+						      attr,
+						      orig_fid,
+						      mdt_fid);
+}
+
+static inline int dt_osd_declare_snapshot_clone(const struct lu_env *env,
+						struct dt_object *snap_dt,
+						struct dt_object *orig_dt,
+						struct thandle *thandle,
+						int ignore_flag)
+{
+	LASSERT(snap_dt);
+	LASSERT(snap_dt->do_ops);
+	LASSERT(snap_dt->do_ops->do_osd_declare_snapshot_clone);
+	return snap_dt->do_ops->do_osd_declare_snapshot_clone(env,
+							      snap_dt,
+							      orig_dt,
+							      thandle,
+							      ignore_flag);
+}
+
+static inline int dt_osd_snapshot_clone(struct dt_object *snap_dt,
+					struct dt_object *orig_dt)
+{
+	LASSERT(snap_dt);
+	LASSERT(snap_dt->do_ops);
+	LASSERT(snap_dt->do_ops->do_osd_snapshot_clone);
+	return snap_dt->do_ops->do_osd_snapshot_clone(snap_dt,
+						      orig_dt);
+}
+
+static inline int dt_osd_snapshot_get_old_list(struct dt_object *snap_dt,
+					       void *list_buf)
+{
+	LASSERT(snap_dt);
+	LASSERT(snap_dt->do_ops);
+	LASSERT(snap_dt->do_ops->do_osd_snapshot_get_old_list);
+	return snap_dt->do_ops->do_osd_snapshot_get_old_list(snap_dt, list_buf);
+}
+
+
 #define LU221_BAD_TIME (0x80000000U + 24 * 3600)
 
 struct dt_find_hint {
diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h
index 4b6fddf..cfa7e59 100644
--- a/lustre/include/linux/lustre_compat25.h
+++ b/lustre/include/linux/lustre_compat25.h
@@ -133,6 +133,14 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
 # define inode_dio_done(i)		up_read(&(i)->i_alloc_sem)
 #endif
 
+#ifdef HAVE_IOP_ATOMIC_OPEN
+#define ll_iop_lookup(parent, dentry)	(parent->i_op->lookup(parent, \
+		dentry, LOOKUP_PARENT))
+#else
+#define ll_iop_lookup(parent, dentry)	(parent->i_op->lookup(parent, \
+		dentry, NULL))
+#endif
+
 #ifndef FS_HAS_FIEMAP
 #define FS_HAS_FIEMAP			(0)
 #endif
diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h
index d3b234d..68cf69b 100644
--- a/lustre/include/lu_object.h
+++ b/lustre/include/lu_object.h
@@ -897,7 +897,8 @@ struct lu_rdpg {
 
 enum lu_xattr_flags {
 	LU_XATTR_REPLACE = (1 << 0),
-	LU_XATTR_CREATE  = (1 << 1)
+	LU_XATTR_CREATE  = (1 << 1),
+	LU_XATTR_SNAPSHOT = (1 << 15)
 };
 
 /** @} helpers */
diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h
index d01a49e..ac8a772 100644
--- a/lustre/include/lu_target.h
+++ b/lustre/include/lu_target.h
@@ -121,6 +121,9 @@ struct tgt_session_info {
 	bool			 tsi_preprocessed;
 	/* request JobID */
 	char                    *tsi_jobid;
+
+	/* disable READONLY control in snapshot */
+	int			tsi_snapshot;
 };
 
 static inline struct tgt_session_info *tgt_ses_info(const struct lu_env *env)
@@ -165,6 +168,60 @@ static inline void tgt_opdata_clear(const struct lu_env *env, __u64 flags)
 }
 
 /*
+ *  tgt_snapshot_set()
+ *
+ *  set snapshot progress (not readonly for snapshot files)
+ *
+ *  \param[in]  env     lu environment
+ */
+static inline void tgt_snapshot_set(const struct lu_env *env)
+{
+	struct tgt_session_info	*tsi;
+
+	LASSERT(env->le_ses);
+	tsi = tgt_ses_info(env);
+	tsi->tsi_snapshot = 1;
+}
+
+/*
+ *  tgt_snapshot_clear()
+ *
+ *  clear snapshot progress (readonly for snapshot files)
+ *
+ *  \param[in]  env     lu environment
+ */
+static inline void tgt_snapshot_clear(const struct lu_env *env)
+{
+	struct tgt_session_info	*tsi;
+
+	LASSERT(env->le_ses);
+	tsi = tgt_ses_info(env);
+	tsi->tsi_snapshot = 0;
+}
+
+/*
+ *  tgt_snapshot()
+ *
+ *  check snapshot readonly progress
+ *
+ *  \param[in]  env     lu environment
+ *
+ *  \retval	0	not snapshot process (readonly for snapshot files)
+ *  \retval	1	snapshot process (not readonly for snapshot files)
+ */
+static inline int tgt_snapshot(const struct lu_env *env)
+{
+	struct tgt_session_info	*tsi;
+	int	rc = 0;
+
+	LASSERT(env->le_ses);
+	tsi = tgt_ses_info(env);
+	rc = tsi->tsi_snapshot;
+
+	return rc;
+}
+
+/*
  * Generic unified target support.
  */
 enum tgt_handler_flags {
diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h
index aa2de2b..fb8dd66 100644
--- a/lustre/include/lustre/lustre_idl.h
+++ b/lustre/include/lustre/lustre_idl.h
@@ -143,6 +143,8 @@
 #define SEQ_CONTROLLER_PORTAL          32
 #define MGS_BULK_PORTAL                33
 
+#define MDS_SNAPSHOT_PORTAL            49
+
 /* Portal 63 is reserved for the Cray Inc DVS - nic@cray.com, roe@cray.com, n8851@cray.com */
 
 /* packet types */
@@ -1533,6 +1535,7 @@ typedef enum {
         OST_QUOTACHECK = 18,
         OST_QUOTACTL   = 19,
 	OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */
+	OST_SNAPSHOT   = 32,
         OST_LAST_OPC
 } ost_cmd_t;
 #define OST_FIRST_OPC  OST_REPLY
@@ -2184,6 +2187,7 @@ typedef enum {
 	REINT_SETXATTR = 7,
 	REINT_RMENTRY  = 8,
 	REINT_MIGRATE  = 9,
+	REINT_SNAPSHOT = 21,
         REINT_MAX
 } mds_reint_t, mdt_reint_t;
 
@@ -2274,6 +2278,11 @@ enum md_op_flags {
 #define LUSTRE_NOATIME_FL      0x00000080 /* do not update atime */
 #define LUSTRE_DIRSYNC_FL      0x00010000 /* dirsync behaviour (dir only) */
 
+/* i_flags for snapshot */
+#define LUSTRE_SNAPSHOT_SHARE_FL	0x01000000	/* data block shared */
+#define LUSTRE_SNAPSHOT_SP_FL		0x04000000	/* snapshot sp flag */
+#define LUSTRE_SNAPSHOT_FL		0x08000000	/* snapshot dir/file */
+
 #ifdef __KERNEL__
 /* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values
  * for the client inode i_flags.  The LUSTRE_*_FL are the Lustre wire
@@ -2289,7 +2298,12 @@ static inline int ll_ext_to_inode_flags(int flags)
 #if defined(S_DIRSYNC)
                 ((flags & LUSTRE_DIRSYNC_FL)   ? S_DIRSYNC   : 0) |
 #endif
-                ((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0));
+		((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0) |
+		((flags & LUSTRE_SNAPSHOT_SHARE_FL)
+			? LUSTRE_SNAPSHOT_SHARE_FL : 0) |
+		((flags & LUSTRE_SNAPSHOT_SP_FL)
+			? LUSTRE_SNAPSHOT_SP_FL : 0) |
+		((flags & LUSTRE_SNAPSHOT_FL) ? LUSTRE_SNAPSHOT_FL : 0));
 }
 
 static inline int ll_inode_to_ext_flags(int iflags)
@@ -2300,7 +2314,11 @@ static inline int ll_inode_to_ext_flags(int iflags)
 #if defined(S_DIRSYNC)
                 ((iflags & S_DIRSYNC)   ? LUSTRE_DIRSYNC_FL   : 0) |
 #endif
-                ((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0));
+		((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0) |
+		((iflags & LUSTRE_SNAPSHOT_SHARE_FL)
+			 ? LUSTRE_SNAPSHOT_SHARE_FL : 0) |
+		((iflags & LUSTRE_SNAPSHOT_SP_FL) ? LUSTRE_SNAPSHOT_SP_FL : 0) |
+		((iflags & LUSTRE_SNAPSHOT_FL) ? LUSTRE_SNAPSHOT_FL : 0));
 }
 #endif
 
@@ -2521,6 +2539,7 @@ enum mds_op_bias {
 	MDS_OWNEROVERRIDE	= 1 << 11,
 	MDS_HSM_RELEASE		= 1 << 12,
 	MDS_RENAME_MIGRATE	= 1 << 13,
+	MDS_SNAPSHOT		= 1 << 30,
 };
 
 /* instance of mdt_reint_rec */
@@ -2674,6 +2693,30 @@ struct mdt_rec_setxattr {
         __u32           sx_padding_11;  /* rr_padding_4 */
 };
 
+struct mdt_rec_snapshot_create {
+	__u32           sc_opcode;
+	__u32           sc_cap;
+	__u32           sc_fsuid;
+	__u32           sc_fsuid_h;
+	__u32           sc_fsgid;
+	__u32           sc_fsgid_h;
+	__u32           sc_suppgid1;
+	__u32           sc_suppgid1_h;
+	__u32           sc_suppgid2;
+	__u32           sc_suppgid2_h;
+	struct lu_fid   sc_snapdir_fid;
+	struct lu_fid   sc_snapshot_fid;
+	obd_time        sc_mtime;
+	obd_time        sc_atime;
+	obd_time        sc_ctime;
+	__u64           sc_file_owner;
+	struct lu_fid   sc_orig_fid;
+	__u32           sc_padding_6;
+	__u32           sc_mode;
+	__u32           sc_umask;
+	__u32           sc_flags;
+};
+
 /*
  * mdt_rec_reint is the template for all mdt_reint_xxx structures.
  * Do NOT change the size of various members, otherwise the value
@@ -3549,6 +3592,9 @@ struct obdo {
 #define o_dropped o_misc
 #define o_cksum   o_nlink
 #define o_grant_used o_data_version
+#define o_snapshot_orig_seq o_data_version
+#define o_snapshot_orig_oid o_uid_h
+#define o_snapshot_orig_ver o_gid_h
 
 struct lfsck_request {
 	__u32		lr_event;
diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h
index 8b612a3..a79d29e 100644
--- a/lustre/include/lustre/lustre_user.h
+++ b/lustre/include/lustre/lustre_user.h
@@ -206,6 +206,40 @@ struct ost_id {
 #define DOSTID LPX64":"LPU64
 #define POSTID(oi) ostid_seq(oi), ostid_id(oi)
 
+/* snapshot request data for create */
+typedef struct {
+	int	src_fd;
+	__u32	name_len;
+	char	name[256];
+	__u32	hidden_f:2;
+	__u32	mode;
+	uid_t   uid;
+	gid_t   gid;
+	struct timespec atim;
+	struct timespec mtim;
+} snapshot_create_t;
+
+/* snapshot request data */
+struct snapshot_data {
+	__u32   subcmd;
+	union {
+		snapshot_create_t	create;
+	} req;
+};
+
+/* snapshot request data */
+struct snapshot_list_data {
+	struct lu_fid fid;
+	unsigned long ost_ino;
+};
+
+#define IOC_SNAPSHOT_LIST_MAX 256
+
+struct snapshot_list_buf {
+	struct snapshot_list_data list_data[IOC_SNAPSHOT_LIST_MAX];
+	int list_num;
+};
+
 /*
  * The ioctl naming rules:
  * LL_*     - works on the currently opened filehandle instead of parent dir
@@ -274,9 +308,18 @@ struct ost_id {
 #define IOC_MDC_GETFILESTRIPE   _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *)
 #define IOC_MDC_GETFILEINFO     _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *)
 #define LL_IOC_MDC_GETINFO      _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *)
+#define LL_IOC_SNAPSHOT         _IOWR('F', 33, struct snapshot_data)
 
 #define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
 
+enum {
+	LL_SNAPSHOT_CHKENABLED = 1,
+	LL_SNAPSHOT_STAT,
+	LL_SNAPSHOT_CREATE,
+	LL_SNAPSHOT_CREATE_POST,
+	LL_SNAPSHOT_UNLINK,
+};
+
 /* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular
  * files, but are unlikely to be used in practice and are not harmful if
  * used incorrectly.  O_NOCTTY and FASYNC are only meaningful for character
diff --git a/lustre/include/lustre_ioctl.h b/lustre/include/lustre_ioctl.h
index dc48ad0..be3278a 100644
--- a/lustre/include/lustre_ioctl.h
+++ b/lustre/include/lustre_ioctl.h
@@ -402,4 +402,21 @@ obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, int max_len)
 
 #define IOC_OSC_SET_ACTIVE	_IOWR('h', 21, void *)
 
+#define OBD_IOC_SNAPSHOT        _IOR('F', 31, OBD_IOC_DATA_TYPE)
+
+enum obd_ioc_snapshot_subcmd {
+	OBD_IOC_SNAPSHOT_ON,
+	OBD_IOC_SNAPSHOT_STATUS,
+	OBD_IOC_SNAPSHOT_LOCK,
+	OBD_IOC_SNAPSHOT_UNLOCK,
+};
+
+#define OBD_IOC_SNAPSHOT_ORPHAN _IOR('F', 30, OBD_IOC_DATA_TYPE)
+
+enum obd_ioc_snapshot_get_orphan_subcmd {
+	OBD_IOC_SNAPSHOT_ORPHAN_INODE,
+	OBD_IOC_SNAPSHOT_LIST_ORPHAN,
+	OBD_IOC_SNAPSHOT_ORPHAN_DEL,
+};
+
 #endif /* LUSTRE_IOCTL_H_ */
diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h
index e10d40e..6c2a2d2 100644
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -310,6 +310,12 @@
 #define MDS_SETA_NTHRS_MAX	MDS_MAX_OTHR_THREADS
 #define MDS_SETA_NTHRS_BASE	min(48, MDS_SETA_NTHRS_MAX)
 
+/* read-page service */
+#define MDS_SNAPSHOT_THR_FACTOR	4
+#define MDS_SNAPSHOT_NTHRS_INIT	2
+#define MDS_SNAPSHOT_NTHRS_MAX	8
+#define MDS_SNAPSHOT_NTHRS_BASE	min(16, MDS_SNAPSHOT_NTHRS_MAX)
+
 /* non-affinity threads */
 #define MDS_OTHR_NTHRS_INIT	PTLRPC_NTHRS_INIT
 #define MDS_OTHR_NTHRS_MAX	MDS_MAX_OTHR_THREADS
diff --git a/lustre/include/lustre_req_layout.h b/lustre/include/lustre_req_layout.h
index fb57f19..1966171 100644
--- a/lustre/include/lustre_req_layout.h
+++ b/lustre/include/lustre_req_layout.h
@@ -183,6 +183,7 @@ extern struct req_format RQF_MDS_REINT_LINK;
 extern struct req_format RQF_MDS_REINT_RENAME;
 extern struct req_format RQF_MDS_REINT_SETATTR;
 extern struct req_format RQF_MDS_REINT_SETXATTR;
+extern struct req_format RQF_MDS_REINT_SNAPSHOT;
 extern struct req_format RQF_MDS_QUOTACHECK;
 extern struct req_format RQF_MDS_QUOTACTL;
 extern struct req_format RQF_QC_CALLBACK;
@@ -216,6 +217,7 @@ extern struct req_format RQF_OST_GET_INFO_LAST_ID;
 extern struct req_format RQF_OST_GET_INFO_LAST_FID;
 extern struct req_format RQF_OST_SET_INFO_LAST_FID;
 extern struct req_format RQF_OST_GET_INFO_FIEMAP;
+extern struct req_format RQF_OST_SNAPSHOT;
 
 /* LDLM req_format */
 extern struct req_format RQF_LDLM_ENQUEUE;
@@ -282,6 +284,7 @@ extern struct req_msg_field RMF_LAYOUT_INTENT;
 extern struct req_msg_field RMF_MDT_MD;
 extern struct req_msg_field RMF_REC_REINT;
 extern struct req_msg_field RMF_EADATA;
+extern struct req_msg_field RMF_EADATA2;
 extern struct req_msg_field RMF_EAVALS;
 extern struct req_msg_field RMF_EAVALS_LENS;
 extern struct req_msg_field RMF_ACL;
@@ -301,6 +304,7 @@ extern struct req_msg_field RMF_HSM_USER_STATE;
 extern struct req_msg_field RMF_HSM_STATE_SET;
 extern struct req_msg_field RMF_MDS_HSM_CURRENT_ACTION;
 extern struct req_msg_field RMF_MDS_HSM_REQUEST;
+extern struct req_msg_field RMF_SNAP_EANAME;
 
 /* seq-mgr fields */
 extern struct req_msg_field RMF_SEQ_OPC;
diff --git a/lustre/include/lustre_snapshot.h b/lustre/include/lustre_snapshot.h
new file mode 100644
index 0000000..8c2447c
--- /dev/null
+++ b/lustre/include/lustre_snapshot.h
@@ -0,0 +1,79 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ *   Copyright(c) 2016-2017 FUJITSU LIMITED.
+ *   All rights reserved.
+ */
+#ifndef __LUSTRE_SNAPSHOT_H
+#define __LUSTRE_SNAPSHOT_H
+
+#ifdef __KERNEL__
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/list.h>
+#else /* !__KERNEL__ */
+#include <stdio.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#endif /* __KERNEL__ */
+
+/* same as ldiskfs/snapshot.h */
+
+/* snapshot flags in lustre.
+ * other snapshot flags are defined
+ * ldiskfs/snapshot.h */
+
+#define SNAPSHOT_FLAGS_MASK \
+	(LUSTRE_SNAPSHOT_FL | LUSTRE_SNAPSHOT_SP_FL | LUSTRE_SNAPSHOT_SHARE_FL)
+#define MDT_SNAPSHOT_FILE_PATT LUSTRE_SNAPSHOT_FL
+#define MDT_SNAPSHOT_DIR_PATT (LUSTRE_SNAPSHOT_FL | LUSTRE_SNAPSHOT_SP_FL)
+#define OST_SNAPSHOT_FILE_PATT \
+	(LUSTRE_SNAPSHOT_SHARE_FL | LUSTRE_SNAPSHOT_SP_FL)
+
+/* snapshot name dir or snapshot */
+#define LUSTRE_TEST_MDT_SNAPSHOT_FILE(flags)				\
+	(((flags) & SNAPSHOT_FLAGS_MASK) == MDT_SNAPSHOT_FILE_PATT)
+
+/* snapshot hidden dir */
+#define LUSTRE_TEST_MDT_SNAPSHOT_DIR(flags)				\
+	(((flags) & SNAPSHOT_FLAGS_MASK) == MDT_SNAPSHOT_DIR_PATT)
+
+#define LUSTRE_TEST_MDT_SNAPSHOT(flags)					\
+	(((flags) & (LUSTRE_SNAPSHOT_FL | LUSTRE_SNAPSHOT_SP_FL)) != 0)
+
+/* snapshot error code in lustre.
+ * other snapshot error codes are defined in
+ * ldiskfs/snapshot.h */
+
+#define SNAPSHOT_OST_ERR_DEL	2453
+#define SNAPSHOT_OST_ERR_MSG \
+	"Snapshot %s: error destroying object "DFID": %d.\n"
+
+#endif  /* __LUSTRE_SNAPSHOT_H */
diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h
index 0b44b64..f39c36c 100644
--- a/lustre/include/md_object.h
+++ b/lustre/include/md_object.h
@@ -259,6 +259,11 @@ struct md_object_operations {
 				 struct md_object *obj,
 				 struct ldlm_enqueue_info *einfo,
 				 union ldlm_policy_data *policy);
+
+	int (*moo_snapshot_clone)(const struct lu_env *env,
+				 struct md_object *snap_obj,
+				 struct md_attr *ma,
+				 struct lu_buf *buf);
 };
 
 /**
@@ -351,6 +356,11 @@ struct md_device_operations {
 
         int (*mdo_iocontrol)(const struct lu_env *env, struct md_device *m,
                              unsigned int cmd, int len, void *data);
+
+	int (*mdo_snapshot_get_enable)(const struct lu_env *env,
+				       struct md_device *m);
+	int (*mdo_snapshot_set_enable)(const struct lu_env *env,
+				       struct md_device *m);
 };
 
 enum md_upcall_event {
@@ -693,6 +703,15 @@ static inline int mo_object_unlock(const struct lu_env *env,
 	return m->mo_ops->moo_object_unlock(env, m, einfo, policy);
 }
 
+static inline int mo_snapshot_clone(const struct lu_env *env,
+				    struct md_object *snap,
+				    struct md_attr *at,
+				    struct lu_buf *buf)
+{
+	LASSERT(snap->mo_ops->moo_snapshot_clone);
+	return snap->mo_ops->moo_snapshot_clone(env, snap, at, buf);
+}
+
 static inline int mdo_lookup(const struct lu_env *env,
                              struct md_object *p,
                              const struct lu_name *lname,
diff --git a/lustre/include/obd.h b/lustre/include/obd.h
index a55bba0..00ad216 100644
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -768,6 +768,8 @@ enum obd_cleanup_stage {
 #define KEY_CACHE_LRU_SHRINK	"cache_lru_shrink"
 #define KEY_OSP_CONNECTED	"osp_connected"
 
+#define KEY_SNAPSHOT_ENABLED	"snapshot_enabled"
+
 struct lu_context;
 
 /* /!\ must be coherent with include/linux/namei.h on patched kernel */
@@ -851,6 +853,10 @@ struct md_op_data {
 	/* File object data version for HSM release, on client */
 	__u64			op_data_version;
 	struct lustre_handle	op_lease_handle;
+
+	/* snapshot data, on client */
+	const char             *op_eaname;
+	int                     op_eanamelen;
 };
 
 #define op_stripe_offset	op_ioepoch
@@ -1000,6 +1006,20 @@ struct obd_ops {
                           char *ostname);
         void (*o_getref)(struct obd_device *obd);
         void (*o_putref)(struct obd_device *obd);
+
+	/* snapshot methods */
+	int (*o_snapshot_lock)(const struct lu_env *env,
+			       struct obd_export *exp,
+			       struct obdo *oa, bool,
+			       void **lock);
+	int (*o_snapshot_unlock)(const struct lu_env *env,
+				 struct obd_export *exp,
+				 struct obdo *oa, void *lock);
+	int (*o_snapshot_get_info)(const struct lu_env *env,
+				   struct obd_export *exp,
+				   struct obdo *oa, int *type);
+	int (*o_snapshot_cancel_lock)(struct obd_export *exp,
+				     void *val);
         /*
          * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
          * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
@@ -1164,6 +1184,9 @@ struct md_ops {
 				  const struct lmv_stripe_md *,
 				  const char *name, int namelen,
 				  struct lu_fid *fid);
+
+	int (*m_snapshot)(struct obd_export *, struct md_op_data *,
+				 struct ptlrpc_request **);
 };
 
 struct lsm_operations {
diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h
index 331a9e6..b14ba57 100644
--- a/lustre/include/obd_class.h
+++ b/lustre/include/obd_class.h
@@ -1452,6 +1452,75 @@ static inline int obd_register_observer(struct obd_device *obd,
         RETURN(0);
 }
 
+/*
+ * obd_snapshot_lock()
+ *
+ * snapshot lock inline function at ofd layer
+ *
+ *  \param[in]	env		environment
+ *  \param[in]	exp		obd export
+ *  \param[in]	oa		object device
+ *  \param[in]	create		for create snapshot
+ *  \param[out]	lock		snapshot lock object
+ *
+ *  \retval	0		success
+ *  \retval	less than 0	failure (-errno)
+ */
+static inline int obd_snapshot_lock(const struct lu_env *env,
+				    struct obd_export *exp,
+				    struct obdo *oa,
+				    bool create,
+				    void **lock)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, snapshot_lock);
+	EXP_COUNTER_INCREMENT(exp, snapshot_lock);
+
+	rc = OBP(exp->exp_obd, snapshot_lock)(env, exp, oa, create, lock);
+	RETURN(rc);
+}
+
+/*
+ * obd_snapshot_unlock()
+ *
+ * snapshot unlock inline function at ofd layer
+ *
+ *  \param[in]	env		environment
+ *  \param[in]	exp		obd export
+ *  \param[in]	oa		object device
+ *  \param[out]	lock		snapshot lock object
+ *
+ *  \retval	0		success
+ *  \retval	less than 0	failure (-errno)
+ */
+static inline int obd_snapshot_unlock(const struct lu_env *env,
+				       struct obd_export *exp,
+				       struct obdo *oa,
+				       void *lock)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, snapshot_unlock);
+	EXP_COUNTER_INCREMENT(exp, snapshot_unlock);
+	rc = OBP(exp->exp_obd, snapshot_unlock)(env, exp, oa, lock);
+	RETURN(rc);
+}
+
+static inline int obd_snapshot_cancel_lock(struct obd_export *exp,
+					   void *val)
+{
+	int rc;
+	ENTRY;
+
+	EXP_CHECK_DT_OP(exp, snapshot_cancel_lock);
+	EXP_COUNTER_INCREMENT(exp, snapshot_cancel_lock);
+	rc = OBP(exp->exp_obd, snapshot_cancel_lock)(exp, val);
+	RETURN(rc);
+}
+
 /* metadata helpers */
 static inline int md_getstatus(struct obd_export *exp,
                                struct lu_fid *fid, struct obd_capa **pc)
@@ -1873,6 +1942,18 @@ static inline int md_get_fid_from_lsm(struct obd_export *exp,
 	RETURN(rc);
 }
 
+static inline int md_snapshot(struct obd_export *exp,
+				struct md_op_data *op_data,
+				struct ptlrpc_request **req)
+{
+	int rc;
+	ENTRY;
+	EXP_CHECK_MD_OP(exp, snapshot);
+	EXP_MD_COUNTER_INCREMENT(exp, snapshot);
+	rc = MDP(exp->exp_obd, snapshot)(exp, op_data, req);
+	RETURN(rc);
+}
+
 /* OBD Metadata Support */
 
 extern int obd_init_caches(void);
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h
index 0044fb7..fb8d9d7 100644
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -316,6 +316,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_OST_STATFS_EINPROGRESS  0x231
 #define OBD_FAIL_OST_SET_INFO_NET        0x232
 #define OBD_FAIL_OST_NODESTROY		 0x233
+#define OBD_FAIL_OST_SNAPSHOT_NET	 0x2ff
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
diff --git a/lustre/llite/Makefile.in b/lustre/llite/Makefile.in
index 562b9d0..e3f49c4 100644
--- a/lustre/llite/Makefile.in
+++ b/lustre/llite/Makefile.in
@@ -6,6 +6,7 @@ lustre-objs += xattr.o xattr_cache.o remote_perm.o llite_rmtacl.o llite_capa.o
 lustre-objs += rw26.o super25.o statahead.o
 lustre-objs += ../lclient/glimpse.o ../lclient/lcommon_cl.o ../lclient/lcommon_misc.o
 lustre-objs += vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o
+lustre-objs += llite_snapshot.o
 
 llite_lloop-objs := lloop.o
 
diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c
index cea0dd3..6172b22 100644
--- a/lustre/llite/dir.c
+++ b/lustre/llite/dir.c
@@ -45,6 +45,7 @@
 #include <asm/uaccess.h>
 #include <linux/buffer_head.h>   // for wait_on_buffer
 #include <linux/pagevec.h>
+#include <linux/fdtable.h>
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
@@ -1040,6 +1041,442 @@ ll_getname(const char __user *filename)
 
 #define ll_putname(filename) __putname(filename)
 
+/*
+ *  ll_snapshot_check_enabled()
+ *
+ *  check snapshot enabled
+ *
+ *  \param[in]	inode		snapshot parent directory inode
+ *
+ *  \retval	0		disabled
+ *  \retval	1		enabled
+ *  \retval	not 0, 1	failure
+ */
+static int ll_snapshot_check_enabled(struct inode *inode)
+{
+	struct ll_sb_info	*sbi = ll_i2sbi(inode);
+	int			enabled, vallen;
+	int			rc;
+	ENTRY;
+
+	if (sbi->ll_snapshot == 1)
+		RETURN(1);
+
+	/* lock */
+	snapshot_llite_lock();
+
+	/* create request data */
+	enabled = 0;
+	vallen = sizeof(enabled);
+	rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_SNAPSHOT_ENABLED),
+		KEY_SNAPSHOT_ENABLED, &vallen, &enabled, NULL);
+	if (rc) {
+		snapshot_llite_unlock();
+		RETURN(rc);
+	}
+
+	/* write sb_info when snapshot enabled */
+	if (enabled == 1)
+		sbi->ll_snapshot = 1;
+
+	/* unlock */
+	snapshot_llite_unlock();
+
+	RETURN(enabled);
+}
+
+/*
+ * ll_snapshot_create()
+ *
+ *  create snapshot
+ *
+ *  \param[in]	inode		snapshot parent directory inode
+ *  \param[in]	snap_data	snapshot request data
+ *
+ *  \retval	0		success
+ *  \retval	not 0		failure
+ */
+static int ll_snapshot_create(struct file *file,
+			      struct ll_sb_info *sbi,
+			      struct inode *inode,
+			      struct snapshot_data *snap_data)
+{
+	struct obd_export	*lmv = ll_i2mdexp(inode);
+	struct file		*src_file = NULL;
+	struct inode		*src_inode = NULL;
+	struct md_op_data	*op_data;
+	struct ptlrpc_request	*req = NULL;
+	struct ss_handle	*handle;
+	struct lov_mds_md	*lmm;
+	char			*ptr, *ptr_val;
+	int			size, size_val;
+	int			i, len;
+	int			rc = 0;
+	ENTRY;
+
+	/* get file structure of snapshot directory */
+	if (snap_data->req.create.src_fd != -1) {
+		src_file = fcheck(snap_data->req.create.src_fd);
+		if (!src_file)
+			GOTO(err, rc = -EINVAL);
+		src_inode = src_file->f_path.dentry->d_inode;
+		if (src_inode->i_flags & LUSTRE_SNAPSHOT_SP_FL)
+			GOTO(err, rc = -EALREADY);
+	}
+
+	/* prepare md data */
+	op_data = ll_prep_md_op_data(NULL,
+				inode,
+				NULL,
+				snap_data->req.create.name,
+				snap_data->req.create.name_len,
+				snap_data->req.create.mode,
+				LUSTRE_OPC_ANY,
+				NULL);
+	if (IS_ERR(op_data))
+		GOTO(err, rc = PTR_ERR(op_data));
+
+	/* set fid3 */
+	if (snap_data->req.create.src_fd != -1)
+		op_data->op_fid3 =
+			*ll_inode2fid(src_file->f_path.dentry->d_inode);
+
+	/* set bias */
+	op_data->op_bias = 0;
+	/* set MDS_SNAPSHOT only snapshot hidden directory (.l_snapshot) */
+	if (snap_data->req.create.hidden_f == 0x1)
+		op_data->op_bias += MDS_SNAPSHOT;
+
+	/* snapshot memory open */
+	handle = snapshot_mem_open();
+	if (handle == NULL) {
+		ll_finish_md_op_data(op_data);
+		GOTO(err, rc = -ENOMEM);
+	}
+
+	/* MEMO:
+	 *  op_fid1	snapshot directory
+	 *  op_fid2	NULL
+	 *  op_fid3	original (src file/dir)
+	 *  op_name	snapshot name
+	 *  op_namelen	snapshot name length
+	 *  op_mode	snapshot attr
+	 *  op_bias	snapshot flag
+	 */
+
+	/* set attribute */
+	op_data->op_attr.ia_valid |= ATTR_MODE;
+	op_data->op_valid |= (OBD_MD_FLMODE | OBD_MD_FLTYPE);
+
+	if (snap_data->req.create.hidden_f) {
+		/* snapshot hidden directory  or snapshot name directory*/
+
+		/* only snapshot hidden directory */
+		if (snap_data->req.create.hidden_f == 0x1 &&
+		    uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
+			op_data->op_fsuid = snap_data->req.create.uid;
+			op_data->op_fsgid = snap_data->req.create.gid;
+		}
+
+		op_data->op_attr.ia_mode = snap_data->req.create.mode;
+
+		op_data->op_attr.ia_atime = CFS_CURRENT_TIME;
+		op_data->op_attr.ia_mtime = CFS_CURRENT_TIME;
+		op_data->op_attr.ia_ctime = CFS_CURRENT_TIME;
+	} else {
+		/* snapshot directory or file */
+		op_data->op_attr.ia_mode = snap_data->req.create.mode;
+		op_data->op_attr.ia_atime = snap_data->req.create.atim;
+		op_data->op_attr.ia_mtime = snap_data->req.create.mtim;
+		op_data->op_attr.ia_ctime = CFS_CURRENT_TIME;
+
+		/* get lov attribute buffer */
+		rc = ll_dir_getstripe(src_inode, (void **)&lmm,
+				      &size_val, &req, 0);
+		if (rc == -ENODATA)
+			size_val = 0;	/* skip no lov */
+		else if (rc != 0)
+			GOTO(err_free_data, rc);
+
+		if (size_val > 0) {
+			/* adjust lmm_stripe_offset of file */
+			if (S_ISREG(snap_data->req.create.mode)) {
+				struct lov_ost_data_v1 *objs;
+				if (lmm->lmm_magic == LOV_MAGIC_V1)
+					objs = &((struct lov_mds_md_v1 *)lmm)
+							->lmm_objects[0];
+				else
+					objs = &((struct lov_mds_md_v3 *)lmm)
+							->lmm_objects[0];
+				lmm->lmm_layout_gen = objs->l_ost_idx;
+			}
+
+			/* alloc memory */
+			rc = snapshot_mem_realloc(handle,
+						  SNAPSHOT_MEM_VALUE, size_val);
+			if (rc)
+				GOTO(err_free_data, rc);
+
+			/* get lov attribute */
+			ptr_val = handle->value[SNAPSHOT_MEM_VALUE].lb_buf;
+			memcpy(ptr_val, lmm, size_val);
+
+			/* set lov data */
+			op_data->op_data = ptr_val;
+			op_data->op_data_size = size_val;
+		}
+
+		/* finish req */
+		ptlrpc_req_finished(req);
+		req = NULL;
+	}
+
+	/* call lmv */
+	rc = md_snapshot(lmv, op_data, &req);
+	if (rc)
+		GOTO(err_free_data, rc);
+
+	/* update original file access time */
+	if (snap_data->req.create.src_fd != -1)
+		ll_update_times(req, src_file->f_path.dentry->d_inode);
+
+	/* finish req */
+	ptlrpc_req_finished(req);
+	req = NULL;
+
+	if (snap_data->req.create.hidden_f)
+		GOTO(out, rc);
+
+	/*
+	 * set extented attribute
+	 */
+
+	/* get xattr list buffer */
+	size = ll_listxattr(src_file->f_path.dentry, NULL, 0);
+	if (size < 0)
+		GOTO(err_free_data, rc = size);
+	if (size == 0)
+		GOTO(out, rc);
+
+	rc = snapshot_mem_realloc(handle, SNAPSHOT_MEM_LIST, size);
+	if (rc)
+		GOTO(err_free_data, rc);
+
+	/* get xattr list */
+	ptr = handle->value[SNAPSHOT_MEM_LIST].lb_buf;
+	size = ll_listxattr(src_file->f_path.dentry, ptr, size);
+	if (size < 0)
+		GOTO(err_free_data, rc = size);
+
+	for (i = 0; i < size; i += (len + 1), ptr += (len + 1)) {
+
+		/* length of attrivute name */
+		len = strlen(ptr);
+
+		/* skip below extended attributes name */
+		if (strncmp(ptr, XATTR_TRUSTED_PREFIX,
+			sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0) {
+			if (strcmp(ptr, XATTR_NAME_LMA) == 0 ||
+			    strcmp(ptr, XATTR_NAME_LMV) == 0 ||
+			    strcmp(ptr, XATTR_NAME_LINK) == 0 ||
+			    strcmp(ptr, XATTR_NAME_FID) == 0 ||
+			    strcmp(ptr, XATTR_NAME_VERSION) == 0 ||
+			    strcmp(ptr, XATTR_NAME_SOM) == 0 ||
+			    strcmp(ptr, XATTR_NAME_HSM) == 0 ||
+			    strcmp(ptr, XATTR_NAME_LFSCK_NAMESPACE) == 0) {
+				continue;
+			}
+		}
+
+		/* skip lov attribute */
+		if (strcmp(ptr, XATTR_NAME_LOV) == 0 ||
+		    strcmp(ptr, XATTR_LUSTRE_PREFIX "lov") == 0) {
+			continue;
+		}
+
+		/* get xattr value buffer */
+		size_val = ll_getxattr(src_file->f_path.dentry, ptr, NULL, 0);
+		if (size_val < 0)
+			GOTO(err_free_data, rc = size_val);
+		if (size_val == 0)
+			ptr_val = "";
+		else {
+			rc = snapshot_mem_realloc(handle, SNAPSHOT_MEM_VALUE,
+								size_val);
+			if (rc)
+				GOTO(err_free_data, rc);
+
+			/* get xattr value */
+			ptr_val = handle->value[SNAPSHOT_MEM_VALUE].lb_buf;
+			size_val = ll_getxattr(src_file->f_path.dentry, ptr,
+							ptr_val, size_val);
+			if (size_val < 0)
+				GOTO(err_free_data, rc = size_val);
+		}
+
+		/* set xattr sname and data */
+		op_data->op_eaname = ptr;
+		op_data->op_eanamelen = len;
+		op_data->op_data = ptr_val;
+		op_data->op_data_size = size_val;
+
+		/* call lmv */
+		rc = md_snapshot(lmv, op_data, &req);
+		if (rc)
+			GOTO(err_free_data, rc);
+
+		/* finish req */
+		ptlrpc_req_finished(req);
+		req = NULL;
+	}
+
+out:
+	/* free md data */
+	ll_finish_md_op_data(op_data);
+
+	/* snapshot memory close */
+	snapshot_mem_close(handle);
+
+	RETURN(0);
+
+err_free_data:
+	/* free md data */
+	ll_finish_md_op_data(op_data);
+
+	/* snapshot memory close */
+	snapshot_mem_close(handle);
+
+	/* request finished */
+	if (req)
+		ptlrpc_req_finished(req);
+
+err:
+	RETURN(rc);
+}
+
+/*
+ * ll_snapshot_unlink()
+ *
+ *  delete snapshot
+ *
+ *  \param[in]	inode		snapshot parent directory inode
+ *  \param[in]	snap_data	snapshot request data
+ *
+ *  \retval	0		success
+ *  \retval	not 0		failure
+ */
+static int ll_snapshot_unlink(struct file *file,
+			      struct ll_sb_info *sbi,
+			      struct inode *inode,
+			      struct snapshot_data *snap_data)
+{
+	struct dentry *pdentry, *dentry = NULL;
+	struct qstr name;
+	struct kstat stat;
+	int rc = 0;
+	ENTRY;
+
+	/* get target file name */
+	name.name = snap_data->req.create.name;
+	name.len = strlen(snap_data->req.create.name);
+
+	/* lookup unlink target */
+	pdentry = file->f_path.dentry;
+	dentry = d_lookup(pdentry, &name);
+	if (IS_ERR(dentry))
+		RETURN(PTR_ERR(dentry));
+
+	if (dentry == NULL) {
+		struct dentry *new;
+
+		new = d_alloc(pdentry, &name);
+		if (new == NULL)
+			RETURN(-ENOMEM);
+		dentry = ll_iop_lookup(inode, new);
+		if (dentry)
+			dput(new);
+		else
+			dentry = new;
+
+		if ((dentry == NULL) || (dentry->d_inode == NULL))
+			GOTO(out, rc = -ENOENT);
+	}
+
+	/* get target type */
+	rc = ll_getattr(file->f_path.mnt, dentry, &stat);
+	if (rc)
+		GOTO(out, rc);
+
+	/* delete target */
+	if (S_ISREG(stat.mode))
+		rc = ll_unlink_for_snapshot(inode, pdentry, dentry, &name);
+	else if (S_ISDIR(stat.mode))
+		rc = ll_rmdir_for_snapshot(inode, pdentry, dentry, &name);
+	else
+		rc = -EINVAL;
+out:
+	if (dentry)
+		dput(dentry);
+
+	RETURN(rc);
+}
+
+/*
+ *  ll_snapshot()
+ *
+ *  create snapshot
+ *
+ *  \param[in]		inode		snapshot parent directory inode
+ *  \param[in]		snap_data	snapshot request data
+ *
+ *  \retval		0		success
+ *  \retval		not 0		failure
+ */
+static int ll_snapshot(struct file *file, struct ll_sb_info *sbi,
+		struct inode *inode, struct snapshot_data *snap_data)
+{
+	int	rc;
+	ENTRY;
+
+	rc = 0;
+	switch (snap_data->subcmd) {
+	/* check snapshot enabled */
+	case LL_SNAPSHOT_CHKENABLED:
+		rc = ll_snapshot_check_enabled(inode);
+		break;
+
+	/* snapshot create */
+	case LL_SNAPSHOT_CREATE:
+		/* snapshot create */
+		rc = ll_snapshot_create(file, sbi, inode, snap_data);
+		break;
+
+	/* snapshot create(post) */
+	case LL_SNAPSHOT_CREATE_POST:
+		/* memory clean */
+		snapshot_mem_clean();
+		break;
+
+	/* snapshot delete */
+	case LL_SNAPSHOT_UNLINK:
+		/* snapshot delete */
+		rc = ll_snapshot_unlink(file, sbi, inode, snap_data);
+		break;
+
+	/* get snapshot directory or not  */
+	case LL_SNAPSHOT_STAT:
+		/* get snapshot inode flags */
+		rc = (inode->i_flags & LUSTRE_SNAPSHOT_SP_FL) ? 1 : 0;
+		break;
+
+	default:
+		rc = -ENOTTY;
+		break;
+	}
+
+	RETURN(rc);
+}
+
 static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
         struct inode *inode = file->f_dentry->d_inode;
@@ -1812,6 +2249,24 @@ migrate_free:
 
 		RETURN(rc);
 	}
+	case LL_IOC_SNAPSHOT: {
+		struct snapshot_data *data;
+
+		if (!S_ISDIR(inode->i_mode))
+			RETURN(-EINVAL);
+
+		OBD_ALLOC_PTR(data);
+		if (data == NULL)
+			RETURN(-ENOMEM);
+		if (copy_from_user(data, (void *)arg, sizeof(*data))) {
+			OBD_FREE_PTR(data);
+			RETURN(-EFAULT);
+		}
+		rc = ll_snapshot(file, sbi, inode, data);
+		OBD_FREE_PTR(data);
+
+		RETURN(rc);
+	}
 	default:
 		RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp, 0, NULL,
 				     (void *)arg));
diff --git a/lustre/llite/file.c b/lustre/llite/file.c
index 6582be3..ace672f 100644
--- a/lustre/llite/file.c
+++ b/lustre/llite/file.c
@@ -51,6 +51,7 @@
 #include <lustre_ioctl.h>
 
 #include "cl_object.h"
+#include <lustre_snapshot.h>
 
 static int
 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
@@ -3346,6 +3347,26 @@ static int ll_merge_md_attr(struct inode *inode)
 	RETURN(0);
 }
 
+static int ll_snaphot_prepare_glimpse(struct inode *inode)
+{
+	struct lov_stripe_md *lsm = NULL;
+	int rc = 0;
+	ENTRY;
+
+	if (!LUSTRE_TEST_MDT_SNAPSHOT_FILE(inode->i_flags))
+		RETURN(0);
+
+	lsm = ccc_inode_lsm_get(inode);
+	if (lsm == NULL) {
+		CERROR("ino=%lu lsm is NULL\n", inode->i_ino);
+		RETURN(-ENOENT);
+	}
+
+	rc = obd_snapshot_cancel_lock(ll_i2dtexp(inode), lsm);
+	ccc_inode_lsm_put(inode, lsm);
+	RETURN(rc);
+}
+
 static int
 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 {
@@ -3377,8 +3398,10 @@ ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 		 * restore the MDT holds the layout lock so the glimpse will
 		 * block up to the end of restore (getattr will block)
 		 */
-		if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
+		if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING)) {
+			ll_snaphot_prepare_glimpse(inode);
 			rc = ll_glimpse_size(inode);
+		}
 	}
 	RETURN(rc);
 }
diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h
index 089b39d..2faa53a 100644
--- a/lustre/llite/llite_internal.h
+++ b/lustre/llite/llite_internal.h
@@ -553,6 +553,8 @@ struct ll_sb_info {
 
 	/* root squash */
 	struct root_squash_info   ll_squash;
+
+	int                       ll_snapshot;	/* snapshot enabled status */
 };
 
 #define LL_DEFAULT_MAX_RW_CHUNK      (32 * 1024 * 1024)
@@ -744,6 +746,10 @@ int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
 struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
 int ll_rmdir_entry(struct inode *dir, char *name, int namelen);
 void ll_update_times(struct ptlrpc_request *request, struct inode *inode);
+int ll_rmdir_for_snapshot(struct inode *dir, struct dentry *dparent,
+			  struct dentry *dchild, struct qstr *name);
+int ll_unlink_for_snapshot(struct inode *dir, struct dentry *dparent,
+			   struct dentry *dchild, struct qstr *name);
 
 /* llite/rw.c */
 int ll_writepage(struct page *page, struct writeback_control *wbc);
@@ -1602,4 +1608,25 @@ void ll_xattr_fini(void);
 int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
 		    struct cl_page *page, enum cl_req_type crt);
 
+/* llite/llite_snapshot.c */
+enum {
+	SNAPSHOT_MEM_LIST = 0,
+	SNAPSHOT_MEM_VALUE,
+	SNAPSHOT_MEM_MAXNUM
+};
+
+struct ss_handle {
+	struct lu_buf		value[SNAPSHOT_MEM_MAXNUM];
+	struct list_head	list;
+};
+
+void snapshot_llite_init(void);
+void snapshot_llite_destroy(void);
+void snapshot_llite_lock(void);
+void snapshot_llite_unlock(void);
+struct ss_handle *snapshot_mem_open(void);
+void snapshot_mem_close(struct ss_handle *ptr);
+void snapshot_mem_clean(void);
+int snapshot_mem_realloc(struct ss_handle *ptr, int id, ssize_t size);
+
 #endif /* LLITE_INTERNAL_H */
diff --git a/lustre/llite/llite_snapshot.c b/lustre/llite/llite_snapshot.c
new file mode 100644
index 0000000..f0afc8b
--- /dev/null
+++ b/lustre/llite/llite_snapshot.c
@@ -0,0 +1,250 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ *   Copyright(c) 2016-2017 FUJITSU LIMITED.
+ *   All rights reserved.
+ */
+#include <lu_object.h>
+#include <obd_support.h>
+#include <lustre_lite.h>
+#include "llite_internal.h"
+
+/* snapshot memory management */
+static struct list_head ss_head;
+static spinlock_t ss_lock;
+
+/* ll_snapshot_check_enable */
+static struct mutex ss_mutex;
+
+/*
+ *  snapshot_mem_free()
+ *
+ *  memory free
+ *
+ *  \param[in]	ptr		memory handle
+ */
+static void snapshot_mem_free(struct ss_handle *ptr)
+{
+	/* free value area */
+	if (ptr->value[0].lb_len > 0)
+		OBD_FREE(ptr->value[0].lb_buf, ptr->value[0].lb_len);
+	if (ptr->value[1].lb_len > 0)
+		OBD_FREE(ptr->value[1].lb_buf, ptr->value[1].lb_len);
+
+	/* free handle area */
+	OBD_FREE(ptr, sizeof(struct ss_handle));
+
+	return;
+}
+
+/*
+ *  snapshot_llite_init()
+ *
+ *  initialize of snapshot memory management (for llite)
+ */
+void snapshot_llite_init(void)
+{
+	/* init table */
+	INIT_LIST_HEAD(&ss_head);
+
+	/* init spinlock object */
+	spin_lock_init(&ss_lock);
+
+	/* init mutex (for ll_snapshot_check_enable) */
+	mutex_init(&ss_mutex);
+
+	return;
+}
+
+/*
+ *  snapshot_llite_destroy()
+ *
+ *  finalize of snapshot memory management (for llite)
+ */
+void snapshot_llite_destroy(void)
+{
+	/* free memory, and handle */
+	snapshot_mem_clean();
+
+	return;
+}
+
+/*
+ *  snapshot_llite_lock()
+ *
+ *  lock mutex for ll_snapshot_check_enable
+ */
+void snapshot_llite_lock(void)
+{
+	/* lock mutex (for ll_snapshot_check_enable) */
+	mutex_lock(&ss_mutex);
+
+	return;
+}
+
+/*
+ *  snapshot_llite_unlock()
+ *
+ *  unlock mutex for ll_snapshot_check_enable
+ */
+void snapshot_llite_unlock(void)
+{
+	/* unlock mutex (for ll_snapshot_check_enable) */
+	mutex_unlock(&ss_mutex);
+
+	return;
+}
+
+/*
+ *  snapshot_mem_open()
+ *
+ *  memory handle open
+ *
+ *  \retval	not NULL	memory handle
+ *  \retval	NULL		error
+ */
+struct ss_handle *snapshot_mem_open(void)
+{
+	struct ss_handle *ptr;
+
+	/* lock table */
+	spin_lock(&ss_lock);
+
+	/* find non used table */
+	if (!list_empty(&ss_head)) {
+
+		/* get handle */
+		ptr = list_entry(ss_head.next, struct ss_handle, list);
+		list_del_init(&ptr->list);
+
+		/* unlock table */
+		spin_unlock(&ss_lock);
+
+		return ptr;
+	}
+
+	/* unlock table */
+	spin_unlock(&ss_lock);
+
+	/* create new table */
+	OBD_ALLOC(ptr, sizeof(struct ss_handle));
+	if (ptr == NULL)
+		return NULL;
+
+	/* initialize new table */
+	memset(ptr, 0x0, sizeof(struct ss_handle));
+	INIT_LIST_HEAD(&ptr->list);
+
+	return ptr;
+}
+
+/*
+ *  snapshot_mem_close()
+ *
+ *  memory handle close
+ *
+ *  \param[in]	ptr	memory handle
+ */
+void snapshot_mem_close(struct ss_handle *ptr)
+{
+	/* lock table */
+	spin_lock(&ss_lock);
+
+	/* save handle */
+	list_add(&ptr->list, &ss_head);
+
+	/* unlock table */
+	spin_unlock(&ss_lock);
+
+	return;
+}
+
+/*
+ *  snapshot_mem_clean()
+ *
+ *  memory handle clean. an unused area is released.
+ *
+ *  \param[in]	ptr	memory handle
+ */
+void snapshot_mem_clean(void)
+{
+	struct ss_handle *ptr, *tmp;
+
+	/* lock table */
+	spin_lock(&ss_lock);
+
+	/* free memory, and delete handle list */
+	if (!list_empty(&ss_head)) {
+		list_for_each_entry_safe(ptr, tmp, &ss_head, list) {
+			list_del(&ptr->list);
+			snapshot_mem_free(ptr);
+		}
+	}
+
+	/* unlock table */
+	spin_unlock(&ss_lock);
+
+	return;
+}
+
+/*
+ *  snapshot_mem_realloc()
+ *
+ *  memory allocate. It recycles if it has already acquired it. If the area is
+ *  insufficient, it acquires it by the unit of 4K.
+ *
+ *  \param[in/out]	ptr	memory handle
+ *  \param[in]		id	buffer id (0 = xattr name, or 1 = xattr value)
+ *  \param[in]		size	size
+ *
+ *  \retval	0	success
+ *  \retval	not 0	error code
+ */
+int snapshot_mem_realloc(struct ss_handle *ptr, int id, ssize_t size)
+{
+	ssize_t new_size;
+	void *new_ptr;
+
+	if (ptr->value[id].lb_len < size) {
+
+		/* free buffer */
+		if (ptr->value[id].lb_len > 0) {
+			OBD_FREE(ptr->value[id].lb_buf, ptr->value[id].lb_len);
+			ptr->value[id].lb_buf = NULL;
+			ptr->value[id].lb_len = 0;
+		}
+
+		/* set new size */
+		new_size = ((size >> 12) + 1) << 12;
+
+		/* realloc buffer */
+		OBD_ALLOC(new_ptr, new_size);
+		if (new_ptr == NULL)
+			return -ENOMEM;
+
+		/* set handle */
+		ptr->value[id].lb_buf = new_ptr;
+		ptr->value[id].lb_len = new_size;
+	}
+
+	return 0;
+}
diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c
index 7770a61..cd95251 100644
--- a/lustre/llite/namei.c
+++ b/lustre/llite/namei.c
@@ -1125,8 +1125,9 @@ static int ll_mkdir_generic(struct inode *dir, struct qstr *name,
         RETURN(err);
 }
 
-static int ll_rmdir_generic(struct inode *dir, struct dentry *dparent,
-                            struct dentry *dchild, struct qstr *name)
+static int __ll_rmdir_generic(struct inode *dir, struct dentry *dparent,
+			      struct dentry *dchild, struct qstr *name,
+			      int flag)
 {
         struct ptlrpc_request *request = NULL;
         struct md_op_data *op_data;
@@ -1147,6 +1148,10 @@ static int ll_rmdir_generic(struct inode *dir, struct dentry *dparent,
 	if (dchild != NULL && dchild->d_inode != NULL)
 		op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
 	op_data->op_fid2 = op_data->op_fid3;
+
+	if (flag == 1)
+		op_data->op_bias |= MDS_SNAPSHOT;
+
         rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
         ll_finish_md_op_data(op_data);
         if (rc == 0) {
@@ -1158,6 +1163,18 @@ static int ll_rmdir_generic(struct inode *dir, struct dentry *dparent,
         RETURN(rc);
 }
 
+static int ll_rmdir_generic(struct inode *dir, struct dentry *dparent,
+			    struct dentry *dchild, struct qstr *name)
+{
+	return __ll_rmdir_generic(dir, dparent, dchild, name, 0);
+}
+
+int ll_rmdir_for_snapshot(struct inode *dir, struct dentry *dparent,
+			  struct dentry *dchild, struct qstr *name)
+{
+	return __ll_rmdir_generic(dir, dparent, dchild, name, 1);
+}
+
 /**
  * Remove dir entry
  **/
@@ -1267,8 +1284,9 @@ out:
  * Instead, ll_ddelete() and ll_d_iput() will update it based upon if there
  * is any lock existing. They will recycle dentries and inodes based upon locks
  * too. b=20433 */
-static int ll_unlink_generic(struct inode *dir, struct dentry *dparent,
-                             struct dentry *dchild, struct qstr *name)
+static int __ll_unlink_generic(struct inode *dir, struct dentry *dparent,
+			       struct dentry *dchild, struct qstr *name,
+			       int flag)
 {
         struct ptlrpc_request *request = NULL;
         struct md_op_data *op_data;
@@ -1293,6 +1311,10 @@ static int ll_unlink_generic(struct inode *dir, struct dentry *dparent,
 		op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
 
 	op_data->op_fid2 = op_data->op_fid3;
+
+	if (flag == 1)
+		op_data->op_bias |= MDS_SNAPSHOT;
+
 	rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
 	ll_finish_md_op_data(op_data);
 	if (rc)
@@ -1307,6 +1329,18 @@ static int ll_unlink_generic(struct inode *dir, struct dentry *dparent,
         RETURN(rc);
 }
 
+static int ll_unlink_generic(struct inode *dir, struct dentry *dparent,
+			     struct dentry *dchild, struct qstr *name)
+{
+	return __ll_unlink_generic(dir, dparent, dchild, name, 0);
+}
+
+int ll_unlink_for_snapshot(struct inode *dir, struct dentry *dparent,
+			   struct dentry *dchild, struct qstr *name)
+{
+	return __ll_unlink_generic(dir, dparent, dchild, name, 1);
+}
+
 static int ll_rename_generic(struct inode *src, struct dentry *src_dparent,
                              struct dentry *src_dchild, struct qstr *src_name,
                              struct inode *tgt, struct dentry *tgt_dparent,
diff --git a/lustre/llite/super25.c b/lustre/llite/super25.c
index f63472b..5a6e867 100644
--- a/lustre/llite/super25.c
+++ b/lustre/llite/super25.c
@@ -202,6 +202,9 @@ static int __init init_lustre_lite(void)
 	if (rc == 0)
 		rc = ll_xattr_init();
 
+	/* snapshot init */
+	snapshot_llite_init();
+
         return rc;
 }
 
@@ -231,6 +234,9 @@ static void __exit exit_lustre_lite(void)
 	kmem_cache_destroy(ll_file_data_slab);
 	if (proc_lustre_fs_root)
 		lprocfs_remove(&proc_lustre_fs_root);
+
+	/* snapshot destroy */
+	snapshot_llite_destroy();
 }
 
 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c
index d18b856..48d72d2 100644
--- a/lustre/lmv/lmv_obd.c
+++ b/lustre/lmv/lmv_obd.c
@@ -2926,7 +2926,8 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
 		   KEY_IS(KEY_DEFAULT_EASIZE) ||
 		   KEY_IS(KEY_MAX_COOKIESIZE) ||
 		   KEY_IS(KEY_DEFAULT_COOKIESIZE) ||
-		   KEY_IS(KEY_CONN_DATA)) {
+		   KEY_IS(KEY_CONN_DATA) ||
+		   KEY_IS(KEY_SNAPSHOT_ENABLED)) {
 		rc = lmv_check_connect(obd);
 		if (rc)
 			RETURN(rc);
@@ -3626,6 +3627,62 @@ int lmv_merge_attr(struct obd_export *exp, const struct lmv_stripe_md *lsm,
 	return 0;
 }
 
+/*
+ * lmv_snapshot()
+ *
+ * lm snapshot
+ *
+ * \param[in]	exp		lmv obd export
+ * \param[in]	op_data		operation data
+ * \param[in]	req		portal rpc request
+ *
+ * \retval	0		success
+ * \retval	not 0		failure
+ */
+int lmv_snapshot(struct obd_export *exp, struct md_op_data *op_data,
+		 struct ptlrpc_request **req)
+{
+	struct obd_device	*obd = exp->exp_obd;
+	struct lmv_obd		*lmv = &obd->u.lmv;
+	struct lmv_tgt_desc	*tgt;
+	int			rc;
+	ENTRY;
+
+	/* check lmv */
+	rc = lmv_check_connect(obd);
+	if (rc)
+		RETURN(rc);
+
+	if (!lmv->desc.ld_active_tgt_count)
+		RETURN(-EIO);
+
+	/* locate mds for snapshot directory */
+	tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+	if (IS_ERR(tgt))
+		RETURN(PTR_ERR(tgt));
+
+	/* allocate fid for snapshot */
+	if (!fid_is_sane(&op_data->op_fid2)) {
+		rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
+		if (rc)
+			RETURN(rc);
+	}
+	/* Send the create request to the MDT where the object
+	 * will be located */
+	tgt = lmv_find_target(lmv, &op_data->op_fid2);
+	if (IS_ERR(tgt))
+		RETURN(PTR_ERR(tgt));
+
+	op_data->op_mds = tgt->ltd_idx;
+
+	op_data->op_flags |= MF_MDC_CANCEL_FID1;
+
+	/* call mdc */
+	rc = md_snapshot(tgt->ltd_exp, op_data, req);
+
+	RETURN(rc);
+}
+
 struct obd_ops lmv_obd_ops = {
         .o_owner                = THIS_MODULE,
         .o_setup                = lmv_setup,
@@ -3681,6 +3738,7 @@ struct md_ops lmv_md_ops = {
         .m_intent_getattr_async = lmv_intent_getattr_async,
 	.m_revalidate_lock      = lmv_revalidate_lock,
 	.m_get_fid_from_lsm	= lmv_get_fid_from_lsm,
+	.m_snapshot		= lmv_snapshot,
 };
 
 int __init lmv_init(void)
diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h
index 22d248c..4716c56 100644
--- a/lustre/lod/lod_internal.h
+++ b/lustre/lod/lod_internal.h
@@ -451,6 +451,11 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname);
 int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
 			struct lu_attr *attr, const struct lu_buf *buf,
 			struct thandle *th);
+int lod_qos_prep_create_for_snapshot(const struct lu_env *env,
+				     struct lod_object *lo,
+				     struct lu_attr *attr,
+				     const struct lu_buf *buf,
+				     struct thandle *th);
 int qos_add_tgt(struct lod_device*, struct lod_tgt_desc *);
 int qos_del_tgt(struct lod_device *, struct lod_tgt_desc *);
 
@@ -465,6 +470,11 @@ int lod_object_set_pool(struct lod_object *o, char *pool);
 int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt,
 			       struct lu_attr *attr,
 			       const struct lu_buf *lovea, struct thandle *th);
+int lod_declare_striped_object_for_snapshot(const struct lu_env *env,
+					    struct dt_object *dt,
+					    struct lu_attr *attr,
+					    const struct lu_buf *lovea,
+					    struct thandle *th);
 int lod_striping_create(const struct lu_env *env, struct dt_object *dt,
 			struct lu_attr *attr, struct dt_object_format *dof,
 			struct thandle *th);
diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c
index 2ea9025..272494e 100644
--- a/lustre/lod/lod_object.c
+++ b/lustre/lod/lod_object.c
@@ -1863,7 +1863,11 @@ static int lod_declare_xattr_set(const struct lu_env *env,
 			attr->la_valid = LA_TYPE | LA_MODE;
 			attr->la_mode = S_IFREG;
 		}
-		rc = lod_declare_striped_object(env, dt, attr, buf, th);
+		if (fl & LU_XATTR_SNAPSHOT)
+			rc = lod_declare_striped_object_for_snapshot(env,
+							dt, attr, buf, th);
+		else
+			rc = lod_declare_striped_object(env, dt, attr, buf, th);
 	} else if (S_ISDIR(mode)) {
 		rc = lod_dir_declare_xattr_set(env, dt, buf, name, fl, th);
 	} else {
@@ -2802,9 +2806,10 @@ static int lod_declare_init_size(const struct lu_env *env,
 /**
  * Create declaration of striped object
  */
-int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt,
-			       struct lu_attr *attr,
-			       const struct lu_buf *lovea, struct thandle *th)
+int __lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt,
+				 struct lu_attr *attr,
+				 const struct lu_buf *lovea, struct thandle *th,
+				 int is_snapshot)
 {
 	struct lod_thread_info	*info = lod_env_info(env);
 	struct dt_object	*next = dt_object_child(dt);
@@ -2821,7 +2826,11 @@ int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt,
 
 	if (!dt_object_remote(next)) {
 		/* choose OST and generate appropriate objects */
-		rc = lod_qos_prep_create(env, lo, attr, lovea, th);
+		if (is_snapshot == 1)
+			rc = lod_qos_prep_create_for_snapshot(env,
+							lo, attr, lovea, th);
+		else
+			rc = lod_qos_prep_create(env, lo, attr, lovea, th);
 		if (rc) {
 			/* failed to create striping, let's reset
 			 * config so that others don't get confused */
@@ -2861,6 +2870,22 @@ out:
 	RETURN(rc);
 }
 
+int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt,
+			       struct lu_attr *attr,
+			       const struct lu_buf *lovea, struct thandle *th)
+{
+	return __lod_declare_striped_object(env, dt, attr, lovea, th, 0);
+}
+
+int lod_declare_striped_object_for_snapshot(const struct lu_env *env,
+					    struct dt_object *dt,
+					    struct lu_attr *attr,
+					    const struct lu_buf *lovea,
+					    struct thandle *th)
+{
+	return __lod_declare_striped_object(env, dt, attr, lovea, th, 1);
+}
+
 static int lod_declare_object_create(const struct lu_env *env,
 				     struct dt_object *dt,
 				     struct lu_attr *attr,
@@ -3254,6 +3279,72 @@ out:
 	RETURN(rc);
 }
 
+/*
+ * lod_snapshot_clone()
+ *
+ * snapshot clone
+ *
+ * \param[in]	env		environment
+ * \param[in]	dt		snapshot lod object
+ * \param[in]	attr		original attributes
+ * \param[in]	buf		original or snapshot lov
+ *				(see mdt_snapshot_unpack() for details)
+ */
+static int lod_snapshot_clone(const struct lu_env *env,
+			      struct dt_object *dt,
+			      struct lu_attr *attr,
+			      struct lu_buf *buf)
+{
+	struct lod_object	*lod_obj = lod_dt_obj(dt);
+	struct lov_mds_md_v1	*lmm;
+	struct lov_ost_data_v1	*objs;
+	__u32			magic;
+	struct ost_id		ost_id;
+	struct lu_fid		orig_fid;
+	int			i;
+	int			rc = 0;
+	ENTRY;
+
+	rc = lod_load_striping_locked(env, lod_obj);
+	if (rc)
+		RETURN(rc);
+
+	lmm = (struct lov_mds_md_v1 *) buf->lb_buf;
+	magic = le32_to_cpu(lmm->lmm_magic);
+	if (buf->lb_len < lov_mds_md_size(lod_obj->ldo_stripenr, magic)) {
+		CDEBUG(D_WARNING, "invalid buf size %d\n", (int)buf->lb_len);
+		RETURN(-EFAULT);
+	}
+
+	for (i = 0; i < lod_obj->ldo_stripenr; i++) {
+
+		if (magic == LOV_MAGIC_V3) {
+			struct lov_mds_md_v3 *v3 = (struct lov_mds_md_v3 *)lmm;
+			objs = &v3->lmm_objects[i];
+		} else {
+			objs = &lmm->lmm_objects[i];
+		}
+
+		ostid_le_to_cpu(&objs->l_ost_oi, &ost_id);
+		rc = ostid_to_fid(&orig_fid, &ost_id,
+				  le32_to_cpu(objs->l_ost_idx));
+		if (rc != 0)
+			break;
+
+		rc = dt_osp_snapshot_clone(env,
+					   lod_obj->ldo_stripe[i],
+					   attr, &orig_fid,
+					   lu_object_fid(&dt->do_lu));
+		if (rc != 0) {
+			CDEBUG(D_ERROR, "failed to OST_SNAPSHOT, "
+			       "send OST_DESTROY to rollback\n");
+			break;
+		}
+
+	}
+	RETURN(rc);
+}
+
 struct dt_object_operations lod_obj_ops = {
 	.do_read_lock		= lod_object_read_lock,
 	.do_write_lock		= lod_object_write_lock,
@@ -3283,6 +3374,7 @@ struct dt_object_operations lod_obj_ops = {
 	.do_object_sync		= lod_object_sync,
 	.do_object_lock		= lod_object_lock,
 	.do_object_unlock	= lod_object_unlock,
+	.do_lod_snapshot_clone  = lod_snapshot_clone,
 };
 
 static ssize_t lod_read(const struct lu_env *env, struct dt_object *dt,
diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c
index ba70a97..aee627b 100644
--- a/lustre/lod/lod_qos.c
+++ b/lustre/lod/lod_qos.c
@@ -1188,6 +1188,69 @@ out_nolock:
 	RETURN(rc);
 }
 
+/*
+ *  lod_alloc_snapshot()
+ *
+ *  lod alloc for snapshot
+ *
+ *  \param[in]		env	environment information
+ *  \param[in/out]	lo	lod object
+ *  \param[in/out]	stripe	stripe information
+ *  \param[in]		th	thread handle
+ *  \param[in]		orig	original data
+ *
+ *  \retval	0		success
+ *  \retval	not 0		failure
+ */
+static int lod_alloc_snapshot(const struct lu_env *env, struct lod_object *lo,
+			      struct dt_object **stripe, struct thandle *th,
+			      struct lov_user_ost_data_v1 *orig)
+{
+	struct lod_device	*m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+	struct dt_object	*o;
+	unsigned int		ost_idx;
+	int			i, rc;
+	struct pool_desc	*pool = NULL;
+	struct ost_pool		*osts;
+	ENTRY;
+
+	rc = lod_qos_ost_in_use_clear(env, lo->ldo_stripenr);
+	if (rc)
+		GOTO(out, rc);
+
+	if (lo->ldo_pool)
+		pool = lod_find_pool(m, lo->ldo_pool);
+
+	if (pool != NULL) {
+		down_read(&pool_tgt_rw_sem(pool));
+		osts = &(pool->pool_obds);
+	} else {
+		osts = &(m->lod_pool_info);
+	}
+
+	for (i = 0; i < lo->ldo_stripenr; i++) {
+		ost_idx = orig[i].l_ost_idx;
+		o = lod_qos_declare_object_on(env, m, ost_idx, th);
+		if (IS_ERR(o)) {
+			CDEBUG(D_OTHER,
+				"cannot declare new object on #%u: %d\n",
+				ost_idx, (int) PTR_ERR(o));
+			GOTO(out, rc = PTR_ERR(o));
+		}
+
+		lod_qos_ost_in_use(env, i, ost_idx);
+		stripe[i] = o;
+	}
+
+out:
+	if (pool != NULL) {
+		up_read(&pool_tgt_rw_sem(pool));
+		lod_pool_putref(pool);
+	}
+
+	RETURN(rc);
+}
+
 /* Find the max stripecount we should use */
 static __u16 lod_get_stripecnt(struct lod_device *lod, __u32 magic,
 			       __u16 stripe_count)
@@ -1252,9 +1315,10 @@ out:
 	RETURN(rc);
 }
 
-static int lod_qos_parse_config(const struct lu_env *env,
-				struct lod_object *lo,
-				const struct lu_buf *buf)
+static int __lod_qos_parse_config(const struct lu_env *env,
+				  struct lod_object *lo,
+				  const struct lu_buf *buf,
+				  int is_snapshot)
 {
 	struct lod_device     *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
 	struct lov_user_md_v1 *v1 = NULL;
@@ -1273,10 +1337,18 @@ static int lod_qos_parse_config(const struct lu_env *env,
 	if (magic == __swab32(LOV_USER_MAGIC_V1)) {
 		lustre_swab_lov_user_md_v1(v1);
 		magic = v1->lmm_magic;
+		if (is_snapshot == 1) {
+			lustre_swab_lov_user_md_objects(v1->lmm_objects,
+							v1->lmm_stripe_count);
+		}
 	} else if (magic == __swab32(LOV_USER_MAGIC_V3)) {
 		v3 = buf->lb_buf;
 		lustre_swab_lov_user_md_v3(v3);
 		magic = v3->lmm_magic;
+		if (is_snapshot == 1) {
+			lustre_swab_lov_user_md_objects(v3->lmm_objects,
+							v3->lmm_stripe_count);
+		}
 	}
 
 	if (unlikely(magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)) {
@@ -1360,12 +1432,26 @@ static int lod_qos_parse_config(const struct lu_env *env,
 	RETURN(0);
 }
 
+static int lod_qos_parse_config(const struct lu_env *env,
+				struct lod_object *lo,
+				const struct lu_buf *buf)
+{
+	return __lod_qos_parse_config(env, lo, buf, 0);
+}
+
+static int lod_qos_parse_config_for_snapshot(const struct lu_env *env,
+					     struct lod_object *lo,
+					     const struct lu_buf *buf)
+{
+	return __lod_qos_parse_config(env, lo, buf, 1);
+}
+
 /*
  * buf should be NULL or contain striping settings
  */
-int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
-			struct lu_attr *attr, const struct lu_buf *buf,
-			struct thandle *th)
+int __lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
+			  struct lu_attr *attr, const struct lu_buf *buf,
+			  struct thandle *th, int is_snapshot)
 {
 	struct lod_device      *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
 	struct dt_object      **stripe;
@@ -1390,7 +1476,10 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
 	 * in case the caller is passing lovea with new striping config,
 	 * we may need to parse lovea and apply new configuration
 	 */
-	rc = lod_qos_parse_config(env, lo, buf);
+	if (is_snapshot == 1)
+		rc = lod_qos_parse_config_for_snapshot(env, lo, buf);
+	else
+		rc = lod_qos_parse_config(env, lo, buf);
 	if (rc)
 		GOTO(out, rc);
 
@@ -1420,13 +1509,31 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
 		/* XXX: support for non-0 files w/o objects */
 		CDEBUG(D_OTHER, "tgt_count %d stripenr %d\n",
 				d->lod_desc.ld_tgt_count, stripe_len);
-		if (lo->ldo_def_stripe_offset >= d->lod_desc.ld_tgt_count) {
-			rc = lod_alloc_qos(env, lo, stripe, flag, th);
-			if (rc == -EAGAIN)
-				rc = lod_alloc_rr(env, lo, stripe, flag, th);
+		if (is_snapshot == 1) {
+			struct lov_user_md_v1 *v1 = buf->lb_buf;
+			struct lov_user_md_v3 *v3 = buf->lb_buf;
+
+			if (lo->ldo_stripe_size != v1->lmm_stripe_size)
+				rc = -EINVAL;
+			else if (lo->ldo_stripenr != v1->lmm_stripe_count)
+				rc = -EINVAL;
+			else
+				rc = lod_alloc_snapshot(env, lo, stripe, th,
+					(v1->lmm_magic == LOV_USER_MAGIC) ?
+					v1->lmm_objects : v3->lmm_objects);
 		} else {
-			rc = lod_alloc_specific(env, lo, stripe, flag, th);
+			if (lo->ldo_def_stripe_offset
+					>= d->lod_desc.ld_tgt_count) {
+				rc = lod_alloc_qos(env, lo, stripe, flag, th);
+				if (rc == -EAGAIN)
+					rc = lod_alloc_rr(env,
+							lo, stripe, flag, th);
+			} else {
+				rc = lod_alloc_specific(env,
+							lo, stripe, flag, th);
+			}
 		}
+
 		lod_putref(d, &d->lod_ost_descs);
 
 		if (rc < 0) {
@@ -1464,3 +1571,18 @@ out:
 	RETURN(rc);
 }
 
+int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
+			struct lu_attr *attr, const struct lu_buf *buf,
+			struct thandle *th)
+{
+	return __lod_qos_prep_create(env, lo, attr, buf, th, 0);
+}
+
+int lod_qos_prep_create_for_snapshot(const struct lu_env *env,
+				     struct lod_object *lo,
+				     struct lu_attr *attr,
+				     const struct lu_buf *buf,
+				     struct thandle *th)
+{
+	return __lod_qos_prep_create(env, lo, attr, buf, th, 1);
+}
diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c
index 7bacb3f..469415e 100644
--- a/lustre/lov/lov_obd.c
+++ b/lustre/lov/lov_obd.c
@@ -2390,6 +2390,37 @@ out:
         RETURN(rc);
 }
 
+static int lov_snapshot_cancel_lock(struct obd_export *exp,
+				    void *val)
+{
+	struct lov_stripe_md *lsm = val;
+	struct obd_device *obddev = class_exp2obd(exp);
+	struct lov_obd *lov = &obddev->u.lov;
+	int i, rc = 0;
+	ENTRY;
+
+	obd_getref(obddev);
+	for (i = 0; i < lsm->lsm_stripe_count; i++) {
+		struct lov_oinfo *loi = lsm->lsm_oinfo[i];
+		if (lov_oinfo_is_dummy(loi))
+			continue;
+		if (!lov->lov_tgts[loi->loi_ost_idx]) {
+			CDEBUG(D_HA, "lov idx %d NULL\n", loi->loi_ost_idx);
+			continue;
+		}
+		rc = obd_snapshot_cancel_lock(
+				lov->lov_tgts[loi->loi_ost_idx]->ltd_exp,
+				&loi->loi_oi);
+		if (rc) {
+			CERROR("cancel lock failed. idx=%d\n",
+			       loi->loi_ost_idx);
+			break;
+		}
+	}
+	obd_putref(obddev);
+	RETURN(rc);
+}
+
 static struct obd_ops lov_obd_ops = {
 	.o_owner		= THIS_MODULE,
 	.o_setup		= lov_setup,
@@ -2419,6 +2450,7 @@ static struct obd_ops lov_obd_ops = {
 	.o_putref		= lov_putref,
 	.o_quotactl		= lov_quotactl,
 	.o_quotacheck		= lov_quotacheck,
+	.o_snapshot_cancel_lock = lov_snapshot_cancel_lock,
 };
 
 struct kmem_cache *lov_oinfo_slab;
diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h
index 1769f3e..d180dc5 100644
--- a/lustre/mdc/mdc_internal.h
+++ b/lustre/mdc/mdc_internal.h
@@ -69,6 +69,7 @@ void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
 void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
                      const char *old, int oldlen, const char *new, int newlen);
 void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
+void mdc_snapshot_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
 
 /* mdc/mdc_locks.c */
 int mdc_set_lock_data(struct obd_export *exp,
@@ -136,6 +137,8 @@ int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
 int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
                       ldlm_policy_data_t *policy, ldlm_mode_t mode,
                       ldlm_cancel_flags_t flags, void *opaque);
+int mdc_snapshot(struct obd_export *exp, struct md_op_data *op_data,
+		struct ptlrpc_request **request);
 
 static inline void mdc_set_capa_size(struct ptlrpc_request *req,
                                      const struct req_msg_field *field,
diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c
index 490204b..ce33656 100644
--- a/lustre/mdc/mdc_lib.c
+++ b/lustre/mdc/mdc_lib.c
@@ -45,6 +45,7 @@
 #include <obd.h>
 #include <cl_object.h>
 #include <lclient.h>
+#include <lustre_snapshot.h>
 #include "mdc_internal.h"
 
 #ifndef __KERNEL__
@@ -549,3 +550,71 @@ void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
         mdc_ioepoch_pack(epoch, op_data);
 	mdc_hsm_release_pack(req, op_data);
 }
+
+/*
+ *  mdc_snapshot_pack()
+ *
+ *  pack md_op_data for snapshot
+ *
+ *  \param[in]		req		portal rpc request
+ *  \param[in/out]	op_data		operation data
+ */
+void mdc_snapshot_pack(struct ptlrpc_request *req,
+			struct md_op_data *op_data)
+{
+	struct mdt_rec_snapshot_create	*rec;
+	char *tmp;
+	ENTRY;
+
+	CLASSERT(sizeof(struct mdt_rec_reint) ==
+			sizeof(struct mdt_rec_snapshot_create));
+	rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
+
+	rec->sc_opcode	= REINT_SNAPSHOT;
+	rec->sc_fsuid	= from_kuid(&init_user_ns, current_fsuid());
+	rec->sc_fsgid	= from_kgid(&init_user_ns, current_fsgid());
+	rec->sc_cap	= op_data->op_cap;
+	rec->sc_file_owner =
+		((__u64)(op_data->op_fsgid) | ((__u64)op_data->op_fsuid << 32));
+	rec->sc_suppgid1 = op_data->op_suppgids[0];
+	rec->sc_suppgid2 = op_data->op_suppgids[1];
+	memcpy(&rec->sc_snapdir_fid, &op_data->op_fid1,
+		sizeof(op_data->op_fid1));
+	memcpy(&rec->sc_snapshot_fid, &op_data->op_fid2,
+		sizeof(op_data->op_fid2));
+	memcpy(&rec->sc_orig_fid, &op_data->op_fid3,
+		sizeof(op_data->op_fid3));
+	rec->sc_mode	= op_data->op_attr.ia_mode;
+	rec->sc_atime	= op_data->op_attr.ia_atime.tv_sec;
+	rec->sc_mtime	= op_data->op_attr.ia_mtime.tv_sec;
+	rec->sc_ctime	= op_data->op_attr.ia_ctime.tv_sec;
+
+	/*
+	 * As snapshots permissions that are trying to create is not changed by
+	 * default mask, umask is set in advance to 0.
+	 */
+	rec->sc_umask	= 0;
+	rec->sc_flags	= 0;
+	/* hidden dir*/
+	if (op_data->op_bias & MDS_SNAPSHOT)
+		rec->sc_flags |= MDT_SNAPSHOT_DIR_PATT;
+
+	/* set snapshot name */
+	mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
+
+	/* set externt attribute */
+	if (op_data->op_eanamelen) {
+		mdc_pack_name(req, &RMF_SNAP_EANAME, op_data->op_eaname,
+							op_data->op_eanamelen);
+	}
+	if (op_data->op_data) {
+		/* set lov */
+		tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
+		memcpy(tmp, op_data->op_data, op_data->op_data_size);
+
+		tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA2);
+		memcpy(tmp, op_data->op_data, op_data->op_data_size);
+	}
+
+	EXIT;
+}
diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c
index c9ddfa3..41f1167 100644
--- a/lustre/mdc/mdc_reint.c
+++ b/lustre/mdc/mdc_reint.c
@@ -369,6 +369,9 @@ int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
 			     obd->u.cli.cl_default_mds_cookiesize);
 	ptlrpc_request_set_replen(req);
 
+	if (op_data->op_bias & MDS_SNAPSHOT)
+		req->rq_request_portal = MDS_SNAPSHOT_PORTAL;
+
         *request = req;
 
         rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL);
@@ -491,3 +494,118 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
 
         RETURN(rc);
 }
+
+/*
+ * mdc_snapshot()
+ *
+ * @exp		mdc obd export
+ * @op_data	operation data
+ * @request	portal rpc request
+ */
+int mdc_snapshot(struct obd_export *exp, struct md_op_data *op_data,
+		 struct ptlrpc_request **request)
+{
+	struct obd_device	*obd = exp->exp_obd;
+	struct ptlrpc_request	*req;
+	struct list_head	cancels = LIST_HEAD_INIT(cancels);
+	int			count;
+	int			rc;
+	ENTRY;
+
+	/* get original ldlm list */
+	count = 0;
+	if (op_data->op_fid3.f_seq != 0) {
+		count = mdc_resource_get_unused(exp, &op_data->op_fid3,
+						&cancels, LCK_CR,
+						MDS_INODELOCK_UPDATE);
+	}
+
+	/* get snapshot directory ldlm list */
+	count += mdc_resource_get_unused(exp, &op_data->op_fid1,
+					&cancels, LCK_EX,
+					MDS_INODELOCK_UPDATE);
+	/* allocate request */
+	req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+					&RQF_MDS_REINT_SNAPSHOT);
+	if (req == NULL) {
+		ldlm_lock_list_put(&cancels, l_bl_ast, count);
+		RETURN(-ENOMEM);
+	}
+
+	/* set request: RMF_NAME */
+	req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+					op_data->op_namelen + 1);
+
+	/* set request: RMF_SNAP_EANAME */
+	req_capsule_set_size(&req->rq_pill, &RMF_SNAP_EANAME, RCL_CLIENT,
+					op_data->op_eanamelen + 1);
+
+	/* set request: RMF_EADATA */
+	req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT,
+					op_data->op_data_size);
+
+	req_capsule_set_size(&req->rq_pill, &RMF_EADATA2, RCL_CLIENT,
+					op_data->op_data_size);
+
+	/* set request: RMF_DLM_REQ */
+	rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
+	if (rc) {
+		ptlrpc_request_free(req);
+		RETURN(rc);
+	}
+
+	spin_lock(&req->rq_lock);
+	req->rq_replay = req->rq_import->imp_replayable;
+	spin_unlock(&req->rq_lock);
+
+	/* set request: RMF_REC_REINT */
+	mdc_snapshot_pack(req, op_data);
+
+	/* set request: RMF_MDT_MD (reply) set orig stripe size */
+	req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
+			     op_data->op_data_size);
+
+	/* set request: length of reply */
+	ptlrpc_request_set_replen(req);
+
+	/* send request */
+	req->rq_request_portal = MDS_SNAPSHOT_PORTAL;
+	rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL);
+	if (rc == -ERESTARTSYS)
+		rc = 0;
+
+	/*
+	 * We save the reply LOV EA in case we have to replay a
+	 * mdt_md_snapshot() for recovery.
+	 */
+	if ((rc == 0) && req->rq_replay && S_ISREG(op_data->op_attr.ia_mode)) {
+		struct mdt_body *body =
+			req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+		if (body == NULL) {
+			CERROR("Can't get mdt_body\n");
+			GOTO(out, rc = -EPROTO);
+		}
+		if (body->mbo_valid & OBD_MD_FLEASIZE) {
+			void *eadata;
+			void *lmm;
+			eadata = req_capsule_server_sized_get(&req->rq_pill,
+							      &RMF_MDT_MD,
+						       body->mbo_eadatasize);
+			if (eadata == NULL)
+				GOTO(out, rc = -EPROTO);
+			lmm = req_capsule_client_get(&req->rq_pill,
+						     &RMF_EADATA);
+			if (lmm == NULL)
+				GOTO(out, rc = -EPROTO);
+			memcpy(lmm, eadata, body->mbo_eadatasize);
+		}
+	}
+out:
+	spin_lock(&req->rq_lock);
+	req->rq_replay = 0;
+	spin_unlock(&req->rq_lock);
+
+	*request = req;
+
+	RETURN(rc);
+}
diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c
index 9a38ea3..e6dca71 100644
--- a/lustre/mdc/mdc_request.c
+++ b/lustre/mdc/mdc_request.c
@@ -3215,7 +3215,8 @@ struct md_ops mdc_md_ops = {
         .m_unpack_capa      = mdc_unpack_capa,
         .m_get_remote_perm  = mdc_get_remote_perm,
         .m_intent_getattr_async = mdc_intent_getattr_async,
-        .m_revalidate_lock      = mdc_revalidate_lock
+	.m_revalidate_lock      = mdc_revalidate_lock,
+	.m_snapshot             = mdc_snapshot
 };
 
 int __init mdc_init(void)
diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c
index d55f358..e305efd 100644
--- a/lustre/mdd/mdd_device.c
+++ b/lustre/mdd/mdd_device.c
@@ -1510,6 +1510,61 @@ static int mdd_iocontrol(const struct lu_env *env, struct md_device *m,
         RETURN (rc);
 }
 
+/*
+ *   mdd_snapshot_get_enable()
+ *
+ *   snapshot get enable handler in metadata device
+ *
+ *   \param[in]	lu_env		lustre enironment data
+ *   \param[in]	m		metadata device
+ *
+ *   \retval	0		snapshot is disabled
+ *   \retval	1		snapshot is enabled
+ *   \retval	not 0,1		error code
+ */
+static int mdd_snapshot_get_enable(const struct lu_env *env,
+				   struct md_device *m)
+{
+	struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+	struct dt_device *dt = mdd->mdd_bottom;
+	int rc;
+	ENTRY;
+
+	LASSERT(dt);
+
+	/* call osd */
+	rc = dt_snapshot_get_enable(env, dt);
+
+	RETURN(rc);
+}
+
+/*
+ *   mdd_snapshot_set_enable()
+ *
+ *   snapshot set enable handler in metadata device
+ *
+ *   \param[in]	lu_env		lustre enironment data
+ *   \param[in]	m		metadata device
+ *
+ *   \retval	0		success
+ *   \retval	not 0		error code
+ */
+static int mdd_snapshot_set_enable(const struct lu_env *env,
+				   struct md_device *m)
+{
+	struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+	struct dt_device *dt = mdd->mdd_bottom;
+	int rc;
+	ENTRY;
+
+	LASSERT(dt);
+
+	/* call osd */
+	rc = dt_snapshot_set_enable(env, dt);
+
+	RETURN(rc);
+}
+
 /* type constructor/destructor: mdd_type_init, mdd_type_fini */
 LU_TYPE_INIT_FINI(mdd, &mdd_thread_key);
 
@@ -1521,6 +1576,8 @@ static const struct md_device_operations mdd_ops = {
 	.mdo_llog_ctxt_get  = mdd_llog_ctxt_get,
 	.mdo_iocontrol      = mdd_iocontrol,
 	.mdo_maxeasize_get  = mdd_maxeasize_get,
+	.mdo_snapshot_get_enable = mdd_snapshot_get_enable,
+	.mdo_snapshot_set_enable = mdd_snapshot_set_enable,
 };
 
 static struct lu_device_type_operations mdd_device_type_ops = {
diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c
index 49389e0..c902471 100644
--- a/lustre/mdd/mdd_dir.c
+++ b/lustre/mdd/mdd_dir.c
@@ -46,7 +46,7 @@
 #include <obd_support.h>
 #include <lustre_mds.h>
 #include <lustre_fid.h>
-
+#include <lustre_snapshot.h>
 #include "mdd_internal.h"
 
 static const char dot[] = ".";
@@ -547,6 +547,13 @@ static int mdd_link_sanity_check(const struct lu_env *env,
         if (mdd_is_dead_obj(src_obj))
                 RETURN(-ESTALE);
 
+	 /* check readonly for snapshot */
+	if (!tgt_snapshot(env)) {
+		if (LUSTRE_TEST_MDT_SNAPSHOT(tattr->la_flags) ||
+		    LUSTRE_TEST_MDT_SNAPSHOT(cattr->la_flags))
+			RETURN(-EPERM);
+	}
+
         /* Local ops, no lookup before link, check filename length here. */
 	rc = mdd_name_check(m, lname);
 	if (rc < 0)
@@ -2379,7 +2386,10 @@ static int mdd_create(const struct lu_env *env, struct md_object *pobj,
 
 		/* update parent directory mtime/ctime */
 		*la = *attr;
-		la->la_valid = LA_CTIME | LA_MTIME;
+		if (LUSTRE_TEST_MDT_SNAPSHOT_FILE(attr->la_flags))
+			la->la_valid = LA_CTIME;
+		else
+			la->la_valid = LA_CTIME | LA_MTIME;
 		rc = mdd_update_time(env, mdd_pobj, pattr, la, handle);
 		if (rc)
 			GOTO(err_insert, rc);
@@ -2516,6 +2526,10 @@ static int mdd_rename_sanity_check(const struct lu_env *env,
 	if (rc)
 		RETURN(rc);
 
+	if (!tgt_snapshot(env) &&
+	    (LUSTRE_TEST_MDT_SNAPSHOT(cattr->la_flags)))
+		RETURN(-EPERM);
+
 	/* XXX: when get here, "tobj == NULL" means tobj must
 	 * NOT exist (neither on remote MDS, such case has been
 	 * processed in cld_rename before mdd_rename and enable
diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h
index 9183bba..c67f7e3 100644
--- a/lustre/mdd/mdd_internal.h
+++ b/lustre/mdd/mdd_internal.h
@@ -760,4 +760,20 @@ static inline struct obd_capa *mdo_capa_get(const struct lu_env *env,
         return next->do_ops->do_capa_get(env, next, old, opc);
 }
 
+static inline int mdo_snapshot_clone(const struct lu_env *env,
+				     struct mdd_object *snap_obj,
+				     struct lu_attr *attr,
+				     struct lu_buf *buf)
+{
+	struct dt_object *snap_next = mdd_object_child(snap_obj);
+	if (mdd_object_exists(snap_obj) == 0) {
+		CERROR("%s: object "DFID" not found: rc = -2\n",
+			mdd_obj_dev_name(snap_obj),
+			PFID(mdd_object_fid(snap_obj)));
+		return -ENOENT;
+	}
+	return snap_next->do_ops->do_lod_snapshot_clone(env,
+					snap_next, attr, buf);
+}
+
 #endif
diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c
index 9f0f8c2..d558e39 100644
--- a/lustre/mdd/mdd_object.c
+++ b/lustre/mdd/mdd_object.c
@@ -53,6 +53,7 @@
 #include <lustre_param.h>
 #include <lustre_mds.h>
 #include <lustre/lustre_idl.h>
+#include <lustre_snapshot.h>
 
 #include "mdd_internal.h"
 
@@ -427,6 +428,12 @@ static int mdd_fix_attr(const struct lu_env *env, struct mdd_object *obj,
 
 	LASSERT(oattr != NULL);
 
+	/* check readonly for snapshot */
+	if (!tgt_snapshot(env)) {
+		if (LUSTRE_TEST_MDT_SNAPSHOT_FILE(oattr->la_flags))
+			RETURN(-EPERM);
+	}
+
 	/* export destroy does not have ->le_ses, but we may want
 	 * to drop LUSTRE_SOM_FL. */
 	uc = lu_ucred_check(env);
@@ -893,6 +900,12 @@ static int mdd_xattr_sanity_check(const struct lu_env *env,
 	struct lu_ucred *uc     = lu_ucred_assert(env);
 	ENTRY;
 
+	/* check readonly for snapshot */
+	if (!tgt_snapshot(env)) {
+		if (LUSTRE_TEST_MDT_SNAPSHOT(attr->la_flags))
+			RETURN(-EPERM);
+	}
+
 	if (mdd_is_immutable(obj) || mdd_is_append(obj))
 		RETURN(-EPERM);
 
@@ -1267,6 +1280,13 @@ static int mdd_layout_swap_allowed(const struct lu_env *env,
 		RETURN(-EBADF);
 	}
 
+	/* check readonly for snapshot */
+	if (!tgt_snapshot(env)) {
+		if (LUSTRE_TEST_MDT_SNAPSHOT(attr1->la_flags) ||
+		    LUSTRE_TEST_MDT_SNAPSHOT(attr2->la_flags))
+			RETURN(-EPERM);
+	}
+
 	if ((attr1->la_uid != attr2->la_uid) ||
 	    (attr1->la_gid != attr2->la_gid))
 		RETURN(-EPERM);
@@ -1578,9 +1598,10 @@ int accmode(const struct lu_env *env, const struct lu_attr *la, int flags)
 	 * NFSD uses the MDS_OPEN_OWNEROVERRIDE flag to say that a file
 	 * owner can write to a file even if it is marked readonly to hide
 	 * its brokenness. (bug 5781) */
-	if (flags & MDS_OPEN_OWNEROVERRIDE) {
-		struct lu_ucred *uc = lu_ucred_check(env);
 
+	if ((flags & MDS_OPEN_OWNEROVERRIDE) &&
+	    !LUSTRE_TEST_MDT_SNAPSHOT(la->la_flags)) {
+		struct lu_ucred *uc = lu_ucred_check(env);
 		if ((uc == NULL) || (la->la_uid == uc->uc_fsuid))
 			return 0;
 	}
@@ -2018,6 +2039,41 @@ static int mdd_object_unlock(const struct lu_env *env,
 	return dt_object_unlock(env, mdd_object_child(mdd_obj), einfo, policy);
 }
 
+/*
+ *  mdd_snapshot_clone()
+ *
+ *  @env	environment
+ *  @snap_obj	snapshot object
+ *  @ma		snapshot attributes
+ *  @buf	original or snapshot lov (see mdt_snapshot_unpack() for details)
+ */
+static int mdd_snapshot_clone(const struct lu_env *env,
+			      struct md_object *snap_obj,
+			      struct md_attr *ma,
+			      struct lu_buf *buf)
+{
+	struct mdd_object	*snap_mdd_obj = md2mdd_obj(snap_obj);
+	int			rc;
+	ENTRY;
+
+	/* check object */
+	if (mdd_object_exists(snap_mdd_obj) == 0) {
+		CERROR("%s: object "DFID" not found: rc = -2\n",
+		mdd_obj_dev_name(snap_mdd_obj),
+		PFID(mdd_object_fid(snap_mdd_obj)));
+		RETURN(-ENOENT);
+	}
+
+	/* lock object */
+	mdd_write_lock(env, snap_mdd_obj, MOR_TGT_CHILD);
+	/* call lod */
+	rc = mdo_snapshot_clone(env, snap_mdd_obj, &ma->ma_attr, buf);
+	/* unlock object */
+	mdd_write_unlock(env, snap_mdd_obj);
+
+	RETURN(rc);
+}
+
 const struct md_object_operations mdd_obj_ops = {
 	.moo_permission		= mdd_permission,
 	.moo_attr_get		= mdd_attr_get,
@@ -2036,4 +2092,5 @@ const struct md_object_operations mdd_obj_ops = {
 	.moo_object_sync	= mdd_object_sync,
 	.moo_object_lock	= mdd_object_lock,
 	.moo_object_unlock	= mdd_object_unlock,
+	.moo_snapshot_clone     = mdd_snapshot_clone,
 };
diff --git a/lustre/mdd/mdd_permission.c b/lustre/mdd/mdd_permission.c
index 2471c0c..0b7d719 100644
--- a/lustre/mdd/mdd_permission.c
+++ b/lustre/mdd/mdd_permission.c
@@ -48,6 +48,7 @@
 #include <lprocfs_status.h>
 #include <lustre_mds.h>
 #include <lustre_idmap.h>
+#include <lustre_snapshot.h>
 #include "mdd_internal.h"
 
 #ifdef CONFIG_FS_POSIX_ACL
@@ -251,6 +252,11 @@ int __mdd_permission_internal(const struct lu_env *env, struct mdd_object *obj,
 	if ((uc == NULL) || (uc->uc_valid == UCRED_INIT))
 		RETURN(0);
 
+	/* check readonly for snapshot */
+	if ((mask & MAY_WRITE) && !tgt_snapshot(env)) {
+		if (LUSTRE_TEST_MDT_SNAPSHOT(la->la_flags))
+			RETURN(-EPERM);
+	}
 	/* Invalid user credit */
 	if (uc->uc_valid == UCRED_INVALID)
 		RETURN(-EACCES);
diff --git a/lustre/mdt/Makefile.in b/lustre/mdt/Makefile.in
index 2b23a67..b343036 100644
--- a/lustre/mdt/Makefile.in
+++ b/lustre/mdt/Makefile.in
@@ -7,5 +7,6 @@ mdt-objs += mdt_hsm_cdt_requests.o
 mdt-objs += mdt_hsm_cdt_client.o
 mdt-objs += mdt_hsm_cdt_agent.o
 mdt-objs += mdt_coordinator.o
+mdt-objs += mdt_snapshot.o
 
 @INCLUDE_RULES@
diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c
index 2e4e8c0..fc1116e 100644
--- a/lustre/mdt/mdt_handler.c
+++ b/lustre/mdt/mdt_handler.c
@@ -1867,7 +1867,8 @@ static int mdt_reint(struct tgt_session_info *tsi)
 		[REINT_OPEN]     = &RQF_MDS_REINT_OPEN,
 		[REINT_SETXATTR] = &RQF_MDS_REINT_SETXATTR,
 		[REINT_RMENTRY]  = &RQF_MDS_REINT_UNLINK,
-		[REINT_MIGRATE]  = &RQF_MDS_REINT_RENAME
+		[REINT_MIGRATE]  = &RQF_MDS_REINT_RENAME,
+		[REINT_SNAPSHOT] = &RQF_MDS_REINT_SNAPSHOT
 	};
 
 	ENTRY;
@@ -2822,6 +2823,9 @@ void mdt_thread_info_init(struct ptlrpc_request *req,
 
 	info->mti_spec.u.sp_ea.eadata = NULL;
 	info->mti_spec.u.sp_ea.eadatalen = 0;
+
+	info->mti_eaname.ln_name = NULL;
+	info->mti_eaname.ln_namelen = 0;
 }
 
 void mdt_thread_info_fini(struct mdt_thread_info *info)
@@ -5584,6 +5588,22 @@ int mdt_get_info(struct tgt_session_info *tsi)
 
 		rc = mdt_rpc_fid2path(info, key, valout, *vallen);
 		mdt_thread_info_fini(info);
+	} else if (KEY_IS(KEY_SNAPSHOT_ENABLED)) {
+		struct mdt_thread_info  *info = tsi2mdt_info(tsi);
+		struct md_device        *next = info->mti_mdt->mdt_child;
+		const struct lu_env     *env = info->mti_env;
+		__u32                   enabled;
+
+		/* call mdd */
+		rc = next->md_ops->mdo_snapshot_get_enable(env, next);
+		mdt_thread_info_fini(info);
+
+		/* set return value */
+		if (rc == 0 || rc == 1) {
+			enabled = (__u32)rc;
+			memcpy(valout, &enabled, *vallen);
+			rc = 0;
+		}
 	} else {
 		rc = -EINVAL;
 	}
@@ -5661,6 +5681,60 @@ static int mdt_ioc_version_get(struct mdt_thread_info *mti, void *karg)
 	RETURN(rc);
 }
 
+/*
+ *  mdt_lctl_snapshot
+ *
+ *  lctl snapshot method of IOCTL
+ *
+ *  \param[in]	env		lustre environment
+ *  \param[in]	mdt		mdt device
+ *  \param[in]	subcmd		sub command
+ *
+ *  subcmd=OBD_IOC_SNAPSHOT_ON
+ *         OBD_IOC_SNAPSHOT_LOCK
+ *         OBD_IOC_SNAPSHOT_UNLOCK
+ *  \retval:	0		success
+ *  \retval:	not 0		error code
+ *
+ *  subcmd=OBD_IOC_SNAPSHOT_STATUS
+ *  \retval:	0		snapshot is disapbled
+ *  \retval:	1		snapshot is enabled
+ *  \retval:	not 0,1		error code
+ */
+static int mdt_lctl_snapshot(struct lu_env *env,
+			       struct mdt_device *mdt, __u32 subcmd)
+{
+	struct md_device *next = mdt->mdt_child;
+	int rc;
+	ENTRY;
+
+	rc = 0;
+	switch (subcmd) {
+	case OBD_IOC_SNAPSHOT_ON:
+		/* call mdd */
+		rc = next->md_ops->mdo_snapshot_set_enable(env, next);
+		break;
+	case OBD_IOC_SNAPSHOT_STATUS:
+		/* call mdd */
+		rc = next->md_ops->mdo_snapshot_get_enable(env, next);
+		break;
+	case OBD_IOC_SNAPSHOT_LOCK:
+		/* test and lock */
+		if (test_and_set_bit(MDT_FL_SNAPSHOT_ENABLING,
+					&mdt->mdt_snapshot_flags))
+			rc = -EBUSY;
+		break;
+	case OBD_IOC_SNAPSHOT_UNLOCK:
+		/* unlock */
+		clear_bit(MDT_FL_SNAPSHOT_ENABLING, &mdt->mdt_snapshot_flags);
+		break;
+	default:
+		rc = -EOPNOTSUPP;
+	}
+
+	RETURN(rc);
+}
+
 /* ioctls on obd dev */
 static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                          void *karg, void *uarg)
@@ -5745,6 +5819,12 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 				       &mti->mti_tmp_fid1);
 		break;
 	 }
+	case OBD_IOC_SNAPSHOT: {
+		struct obd_ioctl_data	*data = karg;
+
+		rc = mdt_lctl_snapshot(&env, mdt, data->ioc_command);
+		break;
+	}
 	default:
 		rc = -EOPNOTSUPP;
 		CERROR("%s: Not supported cmd = %d, rc = %d\n",
diff --git a/lustre/mdt/mdt_idmap.c b/lustre/mdt/mdt_idmap.c
index f49c6dd..31cba12 100644
--- a/lustre/mdt/mdt_idmap.c
+++ b/lustre/mdt/mdt_idmap.c
@@ -322,7 +322,7 @@ int mdt_fix_attr_ucred(struct mdt_thread_info *info, __u32 op)
 	if (uc == NULL)
 		return -EINVAL;
 
-        if (op != REINT_SETATTR) {
+	if ((op != REINT_SETATTR)  && (op != REINT_SNAPSHOT)) {
 		if ((attr->la_valid & LA_UID) && (attr->la_uid != -1))
 			attr->la_uid = uc->uc_fsuid;
 		/* for S_ISGID, inherit gid from his parent, such work will be
diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h
index 9017957..6ee48bf 100644
--- a/lustre/mdt/mdt_internal.h
+++ b/lustre/mdt/mdt_internal.h
@@ -157,6 +157,9 @@ struct coordinator {
 #define MDT_FL_CFGLOG 0
 #define MDT_FL_SYNCED 1
 
+/* snapshot state flag bits */
+#define MDT_FL_SNAPSHOT_ENABLING 0
+
 struct mdt_device {
 	/* super-class */
 	struct lu_device	   mdt_lu_dev;
@@ -230,6 +233,9 @@ struct mdt_device {
 	struct lu_device	  *mdt_qmt_dev;
 
 	struct coordinator	   mdt_coordinator;
+
+	/* snapshot enable flag */
+	unsigned long		   mdt_snapshot_flags;
 };
 
 #define MDT_SERVICE_WATCHDOG_FACTOR	(2)
@@ -455,6 +461,9 @@ struct mdt_thread_info {
 	/* should be enough to fit lustre_mdt_attrs */
 	char			   mti_xattr_buf[128];
 	struct ldlm_enqueue_info   mti_einfo;
+
+	/* for snapshot in request */
+	struct lu_name              mti_eaname;
 };
 
 extern struct lu_context_key mdt_thread_key;
@@ -1082,5 +1091,19 @@ static inline char *mdt_obd_name(struct mdt_device *mdt)
 int mds_mod_init(void);
 void mds_mod_exit(void);
 
+/* mdt/mdt_snapshot.c */
+enum {
+	SNAPSHOT_COUNT_MKDIR = 0,
+	SNAPSHOT_COUNT_MKNOD,
+	SNAPSHOT_COUNT_RMDIR,
+	SNAPSHOT_COUNT_UNLINK,
+	SNAPSHOT_COUNT_SETXATTR,
+	SNAPSHOT_COUNT_LAST,
+};
+
+void mdt_snapshot_counter_init(void);
+inline void mdt_snapshot_counter_incr(int op);
+inline __u64 mdt_snapshot_counter_get(int op);
+
 #endif /* __KERNEL__ */
 #endif /* _MDT_H */
diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c
index 65ceaf3..2e436b2 100644
--- a/lustre/mdt/mdt_lib.c
+++ b/lustre/mdt/mdt_lib.c
@@ -1203,6 +1203,11 @@ static int mdt_unlink_unpack(struct mdt_thread_info *info)
         else
                 ma->ma_attr_flags &= ~MDS_VTX_BYPASS;
 
+	if (rec->ul_bias & MDS_SNAPSHOT)
+		ma->ma_attr_flags |= MDS_SNAPSHOT;
+	else
+		ma->ma_attr_flags &= ~MDS_SNAPSHOT;
+
 	info->mti_spec.no_create = !!req_is_replay(mdt_info_req(info));
 
         rc = mdt_dlmreq_unpack(info);
@@ -1279,12 +1284,10 @@ static int mdt_rename_unpack(struct mdt_thread_info *info)
 /*
  * please see comment above LOV_MAGIC_V1_DEF
  */
-static void mdt_fix_lov_magic(struct mdt_thread_info *info)
+static void mdt_fix_lov_magic(struct mdt_thread_info *info, void *lmm)
 {
-	struct mdt_reint_record *rr = &info->mti_rr;
-	struct lov_user_md_v1   *v1;
+	struct lov_user_md_v1   *v1 = lmm;
 
-	v1 = (void *)rr->rr_eadata;
 	LASSERT(v1);
 
 	if (unlikely(req_is_replay(mdt_info_req(info)))) {
@@ -1374,7 +1377,7 @@ static int mdt_open_unpack(struct mdt_thread_info *info)
                         sp->u.sp_ea.eadatalen = rr->rr_eadatalen;
                         sp->u.sp_ea.eadata = rr->rr_eadata;
                         sp->no_create = !!req_is_replay(req);
-			mdt_fix_lov_magic(info);
+			mdt_fix_lov_magic(info, (void *)rr->rr_eadata);
                 }
 
                 /*
@@ -1454,6 +1457,87 @@ static int mdt_setxattr_unpack(struct mdt_thread_info *info)
         RETURN(0);
 }
 
+/*
+ *  mdt_snapshot_unpack()
+ *
+ *  \param[in]	info	thread information
+ *
+ *  \retval	0	success
+ *  \retval	not 0	error code
+ */
+static int mdt_snapshot_unpack(struct mdt_thread_info *info)
+{
+	struct lu_ucred			*uc = mdt_ucred(info);
+	struct mdt_reint_record		*rr = &info->mti_rr;
+	struct md_attr			*ma = &info->mti_attr;
+	struct req_capsule		*pill = info->mti_pill;
+	struct lu_attr			*attr = &ma->ma_attr;
+	struct lu_fid			*tmp_fid  = &info->mti_tmp_fid1;
+	struct mdt_rec_snapshot_create	*rec;
+	int				rc;
+	ENTRY;
+
+	CLASSERT(sizeof(struct mdt_rec_reint) ==
+				sizeof(struct mdt_rec_snapshot_create));
+	rec = req_capsule_client_get(pill, &RMF_REC_REINT);
+	if (rec == NULL) {
+		CERROR("snapshot no request data\n");
+		RETURN(-EFAULT);
+	}
+
+	/* set user cred */
+	uc->uc_fsuid = rec->sc_fsuid;
+	uc->uc_fsgid = rec->sc_fsgid;
+	uc->uc_cap   = rec->sc_cap;
+	uc->uc_umask = rec->sc_umask;
+	uc->uc_suppgids[0] = rec->sc_suppgid1;
+	uc->uc_suppgids[1] = -1;
+
+	/* set reint record */
+	rr->rr_fid1   = &rec->sc_snapdir_fid;
+	rr->rr_fid2   = &rec->sc_snapshot_fid;
+	mdt_name_unpack(pill, &RMF_NAME, &rr->rr_name, MNF_FIX_ANON);
+	mdt_name_unpack(pill, &RMF_SNAP_EANAME, &info->mti_eaname, 0);
+
+	/* set orig lov */
+	rr->rr_eadata = req_capsule_client_get(pill, &RMF_EADATA2);
+	rr->rr_eadatalen = req_capsule_get_size(pill, &RMF_EADATA2, RCL_CLIENT);
+
+
+	/* set attributes */
+	attr->la_mode  = rec->sc_mode;
+	attr->la_atime = rec->sc_atime;
+	attr->la_mtime = rec->sc_mtime;
+	attr->la_ctime = rec->sc_ctime;
+	attr->la_flags = rec->sc_flags;
+	attr->la_uid   = (__u32)(rec->sc_file_owner >> 32);
+	attr->la_gid   = (__u32)(rec->sc_file_owner & 0xFFFFFFFFUll);
+	attr->la_valid = LA_MODE  | LA_UID   | LA_GID |
+			 LA_CTIME | LA_MTIME | LA_ATIME;
+
+	/* set spec */
+	memset(&info->mti_spec.u, 0, sizeof(info->mti_spec.u));
+	info->mti_spec.sp_cr_flags = 0;
+
+	/* orig mdt_md_snapshot() request: orig lov
+	 * replay mdt_md_snapshot() request: snapshot lov
+	 * orig and replay mdt_md_snapshot_xattr() request: orig xattr
+	 */
+	ma->ma_lmm = req_capsule_client_get(pill, &RMF_EADATA);
+	ma->ma_lmm_size = req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT);
+	if (ma->ma_lmm_size  > 0 && S_ISREG(attr->la_mode))
+		mdt_fix_lov_magic(info, ma->ma_lmm);
+
+	/* set original fid */
+	*tmp_fid = rec->sc_orig_fid;
+
+	/* set ldlm request */
+	rc = mdt_dlmreq_unpack(info);
+	if (rc)
+		CERROR("snapshot ldlm request error rc=%d\n", rc);
+
+	RETURN(rc);
+}
 
 typedef int (*reint_unpacker)(struct mdt_thread_info *info);
 
@@ -1467,6 +1551,7 @@ static reint_unpacker mdt_reint_unpackers[REINT_MAX] = {
 	[REINT_SETXATTR] = mdt_setxattr_unpack,
 	[REINT_RMENTRY]  = mdt_rmentry_unpack,
 	[REINT_MIGRATE]  = mdt_rename_unpack,
+	[REINT_SNAPSHOT] = mdt_snapshot_unpack,
 };
 
 int mdt_reint_unpack(struct mdt_thread_info *info, __u32 op)
diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c
index ff7a260..80b327a 100644
--- a/lustre/mdt/mdt_lproc.c
+++ b/lustre/mdt/mdt_lproc.c
@@ -826,6 +826,34 @@ LPROC_SEQ_FOPS_RW_TYPE(mdt, ir_factor);
 LPROC_SEQ_FOPS_RW_TYPE(mdt, nid_stats_clear);
 LPROC_SEQ_FOPS(mdt_hsm_cdt_control);
 
+/*
+ * mdt_snapshot_stats_seq_show()
+ *
+ * /proc/fs/mdt/<fs>-MDT0000/snapshot_stats show process
+ *
+ * \param[in]	m	data within struct obd_device
+ * \param[in]	data	callback data (unuse)
+ *
+ * \retval	0, and over	size of result message
+ * \retval	-1		error
+ */
+static int mdt_snapshot_stats_seq_show(struct seq_file *m, void *data)
+{
+	return seq_printf(m,
+		"%-25s %llu samples [reqs]\n"
+		"%-25s %llu samples [reqs]\n"
+		"%-25s %llu samples [reqs]\n"
+		"%-25s %llu samples [reqs]\n"
+		"%-25s %llu samples [reqs]\n",
+		"mknod", mdt_snapshot_counter_get(SNAPSHOT_COUNT_MKNOD),
+		"unlink", mdt_snapshot_counter_get(SNAPSHOT_COUNT_UNLINK),
+		"mkdir", mdt_snapshot_counter_get(SNAPSHOT_COUNT_MKDIR),
+		"rmdir", mdt_snapshot_counter_get(SNAPSHOT_COUNT_RMDIR),
+		"setxattr", mdt_snapshot_counter_get(SNAPSHOT_COUNT_SETXATTR));
+}
+
+LPROC_SEQ_FOPS_RO(mdt_snapshot_stats);
+
 static struct lprocfs_seq_vars lprocfs_mdt_obd_vars[] = {
 	{ .name =	"uuid",
 	  .fops =	&mdt_uuid_fops				},
@@ -879,6 +907,8 @@ static struct lprocfs_seq_vars lprocfs_mdt_obd_vars[] = {
 	  .fops =	&mdt_enable_remote_dir_gid_fops		},
 	{ .name =	"hsm_control",
 	  .fops =	&mdt_hsm_cdt_control_fops		},
+	{ .name =	"snapshot_stats",
+	  .fops =	&mdt_snapshot_stats_fops		},
 	{ 0 }
 };
 
@@ -974,6 +1004,9 @@ int mdt_procfs_init(struct mdt_device *mdt, const char *name)
 
 	LASSERT(name != NULL);
 
+	/* initialized snapshot counter */
+	mdt_snapshot_counter_init();
+
 	obd->obd_vars = lprocfs_mdt_obd_vars;
 	rc = lprocfs_seq_obd_setup(obd);
 	if (rc) {
diff --git a/lustre/mdt/mdt_mds.c b/lustre/mdt/mdt_mds.c
index 424ddf5..ceaab97 100644
--- a/lustre/mdt/mdt_mds.c
+++ b/lustre/mdt/mdt_mds.c
@@ -64,6 +64,7 @@ struct mds_device {
 	struct ptlrpc_service     *mds_mdsc_service;
 	struct ptlrpc_service     *mds_mdss_service;
 	struct ptlrpc_service     *mds_fld_service;
+	struct ptlrpc_service     *mds_snapshot_service;
 };
 
 /*
@@ -99,6 +100,14 @@ static char *mds_attr_num_cpts;
 CFS_MODULE_PARM(mds_attr_num_cpts, "c", charp, 0444,
 		"CPU partitions MDS setattr threads should run on");
 
+static unsigned long mds_snapshot_num_threads;
+CFS_MODULE_PARM(mds_snapshot_num_threads, "ul", ulong, 0444,
+		"number of MDS snapshot service threads to start");
+
+static char *mds_snapshot_num_cpts;
+CFS_MODULE_PARM(mds_snapshot_num_cpts, "c", charp, 0444,
+		"CPU partitions MDS snapshot threads should run on");
+
 /* device init/fini methods */
 static void mds_stop_ptlrpc_service(struct mds_device *m)
 {
@@ -131,6 +140,10 @@ static void mds_stop_ptlrpc_service(struct mds_device *m)
 		ptlrpc_unregister_service(m->mds_fld_service);
 		m->mds_fld_service = NULL;
 	}
+	if (m->mds_snapshot_service != NULL) {
+		ptlrpc_unregister_service(m->mds_snapshot_service);
+		m->mds_snapshot_service = NULL;
+	}
 	EXIT;
 }
 
@@ -428,6 +441,51 @@ static int mds_start_ptlrpc_service(struct mds_device *m)
 		GOTO(err_mds_svc, rc);
 	}
 
+	/* snapshot service start */
+	memset(&conf, 0, sizeof(conf));
+	conf = (typeof(conf)) {
+		.psc_name		= LUSTRE_MDT_NAME "_snapshot",
+		.psc_watchdog_factor	= MDT_SERVICE_WATCHDOG_FACTOR,
+		.psc_buf		= {
+			.bc_nbufs		= MDS_NBUFS,
+			.bc_buf_size		= MDS_REG_BUFSIZE,
+			.bc_req_max_size	= MDS_REG_MAXREQSIZE,
+			.bc_rep_max_size	= MDS_REG_MAXREPSIZE,
+			.bc_req_portal		= MDS_SNAPSHOT_PORTAL,
+			.bc_rep_portal		= MDC_REPLY_PORTAL,
+		},
+		/*
+		 * We'd like to have a mechanism to set this on a per-device
+		 * basis, but alas...
+		 */
+		.psc_thr		= {
+			.tc_thr_name		= LUSTRE_MDT_NAME "_snap",
+			.tc_thr_factor		= MDS_SNAPSHOT_THR_FACTOR,
+			.tc_nthrs_init		= MDS_SNAPSHOT_NTHRS_INIT,
+			.tc_nthrs_base		= MDS_SNAPSHOT_NTHRS_BASE,
+			.tc_nthrs_max		= MDS_SNAPSHOT_NTHRS_MAX,
+			.tc_nthrs_user		= mds_snapshot_num_threads,
+			.tc_cpu_affinity	= 1,
+			.tc_ctx_tags		= LCT_MD_THREAD,
+		},
+		.psc_cpt		= {
+			.cc_pattern		= mds_snapshot_num_cpts,
+		},
+		.psc_ops		= {
+			.so_req_handler		= tgt_request_handle,
+			.so_req_printer		= target_print_req,
+			.so_hpreq_handler	= ptlrpc_hpreq_handler,
+		},
+	};
+	m->mds_snapshot_service = ptlrpc_register_service(&conf, procfs_entry);
+	if (IS_ERR(m->mds_snapshot_service)) {
+		rc = PTR_ERR(m->mds_snapshot_service);
+		CERROR("failed to start snapshot service: %d\n", rc);
+		m->mds_snapshot_service = NULL;
+
+		GOTO(err_mds_svc, rc);
+	}
+
 	EXIT;
 err_mds_svc:
 	if (rc)
diff --git a/lustre/mdt/mdt_recovery.c b/lustre/mdt/mdt_recovery.c
index f5a49da..9de1661 100644
--- a/lustre/mdt/mdt_recovery.c
+++ b/lustre/mdt/mdt_recovery.c
@@ -367,7 +367,8 @@ static mdt_reconstructor reconstructors[REINT_MAX] = {
         [REINT_UNLINK]   = mdt_reconstruct_generic,
         [REINT_RENAME]   = mdt_reconstruct_generic,
         [REINT_OPEN]     = mdt_reconstruct_open,
-        [REINT_SETXATTR] = mdt_reconstruct_generic
+	[REINT_SETXATTR] = mdt_reconstruct_generic,
+	[REINT_SNAPSHOT] = mdt_reconstruct_create
 };
 
 void mdt_reconstruct(struct mdt_thread_info *mti,
diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c
index 2f98026..b909647 100644
--- a/lustre/mdt/mdt_reint.c
+++ b/lustre/mdt/mdt_reint.c
@@ -48,6 +48,7 @@
 
 #include "mdt_internal.h"
 #include <lustre_lmv.h>
+#include <lustre_snapshot.h>
 
 static inline void mdt_reint_init_ma(struct mdt_thread_info *info,
                                      struct md_attr *ma)
@@ -867,6 +868,7 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
 	struct mdt_object	*s0_obj = NULL;
 	int			rc;
 	int			no_name = 0;
+	int			snapshot_flag = 0;
 	ENTRY;
 
 	DEBUG_REQ(D_INODE, req, "unlink "DFID"/"DNAME"", PFID(rr->rr_fid1),
@@ -881,6 +883,12 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
 	if (!fid_is_md_operative(rr->rr_fid1))
 		RETURN(-EPERM);
 
+	/* set snapshot readonly flag */
+	if (ma->ma_attr_flags & MDS_SNAPSHOT) {
+		tgt_snapshot_set(info->mti_env);
+		snapshot_flag = 1;
+	}
+
         /*
 	 * step 1: Found the parent.
          */
@@ -1042,6 +1050,8 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
                 switch (ma->ma_attr.la_mode & S_IFMT) {
                 case S_IFDIR:
 			mdt_counter_incr(req, LPROC_MDT_RMDIR);
+			if (snapshot_flag == 1)
+				mdt_snapshot_counter_incr(SNAPSHOT_COUNT_RMDIR);
                         break;
                 case S_IFREG:
                 case S_IFLNK:
@@ -1050,6 +1060,9 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
                 case S_IFIFO:
                 case S_IFSOCK:
 			mdt_counter_incr(req, LPROC_MDT_UNLINK);
+			if (snapshot_flag == 1)
+				mdt_snapshot_counter_incr(
+					SNAPSHOT_COUNT_UNLINK);
                         break;
                 default:
                         LASSERTF(0, "bad file type %o unlinking\n",
@@ -1069,6 +1082,10 @@ unlock_parent:
 put_parent:
 	mdt_object_put(info->mti_env, mp);
 out:
+	/* clear snapshot readonly flag */
+	if (snapshot_flag == 1)
+		tgt_snapshot_clear(info->mti_env);
+
         return rc;
 }
 
@@ -2008,6 +2025,510 @@ static int mdt_reint_migrate(struct mdt_thread_info *info,
 	return mdt_reint_rename_or_migrate(info, lhc, MRL_MIGRATE);
 }
 
+/*
+ *  mdt_md_snapshot()
+ *
+ *  create snapshot
+ *
+ *  \param[in]	info	reint information
+ *
+ *  \retval	0	success
+ *  \retval	not 0	error code
+ */
+static int mdt_md_snapshot(struct mdt_thread_info *info)
+{
+	struct ptlrpc_request   *req = mdt_info_req(info);
+	struct mdt_device	*mdt = info->mti_mdt;
+	struct mdt_body		*repbody;
+	struct mdt_object	*snap_dir, *snap;
+	struct mdt_reint_record	*rr = &info->mti_rr;
+	struct mdt_lock_handle	*snap_dir_lh;
+	struct lu_fid		*snap_dir_fid = (struct lu_fid *)rr->rr_fid1;
+	struct lu_fid		*snap_fid = (struct lu_fid *)rr->rr_fid2;
+	struct lu_fid		*orig_fid = &info->mti_tmp_fid1;
+	struct lu_buf		*buf = &info->mti_buf;
+	struct lu_name		*snap_name = &rr->rr_name;
+	struct md_attr		*ma = &info->mti_attr;
+	int			rc, rc2;
+	ENTRY;
+
+	/* MEMO:
+	 * In info, follows are set by mdt_snapshot_unpack().
+	 * (1) mdt_ucred(info)
+	 *      uc->uc_fsuid		uid
+	 *      uc->uc_fsgid		gid
+	 *      uc->uc_cap		cap
+	 *      uc->uc_umask		umask
+	 * (2) info->mti_rr
+	 *      rr->rr_fid1		snapshot directory fid
+	 *      rr->rr_fid2		snapshot fid
+	 *      rr->rr_name		snapshot name
+	 * (3) info->mti_attr.ma_attr
+	 *      attr->la_mode		mode
+	 *      attr->la_uid		uid
+	 *      attr->la_gid		gid
+	 *      attr->la_atime		atime
+	 *      attr->la_mtime		mtime
+	 *      attr->la_ctime		ctime
+	 *      attr->la_valid		LA_MODE | LA_UID | LA_GID |
+	 *				LA_CTIME | LA_MTIME | LA_ATIME
+	 * (4) info->mti_spec		<empty>
+	 * (5) info->mti_tmp_fid1	original fid
+	 * (6) info->mti_dlm_req	ldlm cancel request
+	 */
+
+	DEBUG_REQ(D_INODE, req,
+		  "START TO CREATE SNAPSHOT ("DNAME"->"DFID") "
+		  "in "DFID" orig "DFID"",
+		  PNAME(snap_name), PFID(snap_fid),
+		  PFID(snap_dir_fid), PFID(orig_fid));
+
+	/* check snapshot parent directory FID */
+	if (!fid_is_md_operative(snap_dir_fid)) {
+		CERROR("check error snapshot parent directory FID "DFID"\n",
+		       PFID(snap_dir_fid));
+		GOTO(out, rc = -EPERM);
+	}
+	/* check snapshot orig FID */
+	if (!fid_is_md_operative(orig_fid) &&
+	    !fid_seq_is_dot(orig_fid->f_seq)) {
+		CERROR("check error snapshot orig directory FID "DFID"\n",
+		       PFID(orig_fid));
+		GOTO(out, rc = -EPERM);
+	}
+
+	/* get reply body */
+	repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
+
+	/* search snapshot parent directory */
+	snap_dir = mdt_object_find(info->mti_env, info->mti_mdt, snap_dir_fid);
+	if (IS_ERR(snap_dir)) {
+		CERROR("search error snapshot parent directory\n");
+		GOTO(out, rc = PTR_ERR(snap_dir));
+	}
+
+	/* test snapshot parent directory existance */
+	if (!mdt_object_exists(snap_dir)) {
+		CERROR("test error snapshot parent directory existance\n");
+		GOTO(out_free_snap_dir, rc = -ENOENT);
+	}
+
+	/* lock snapshot parent directory */
+	snap_dir_lh = &info->mti_lh[MDT_LH_PARENT];
+	mdt_lock_pdo_init(snap_dir_lh, LCK_PW, snap_name);
+	rc = mdt_object_lock(info, snap_dir, snap_dir_lh, MDS_INODELOCK_UPDATE,
+			     MDT_CROSS_LOCK);
+	if (rc) {
+		CERROR("lock error snapshot parent directory rc=%d\n", rc);
+		GOTO(out_free_snap_dir, rc);
+	}
+
+	if (!mdt_object_remote(snap_dir)) {
+		rc = mdt_version_get_check_save(info, snap_dir, 0);
+		if (rc) {
+			CERROR("version check error snapshot rc=%d\n",
+			       rc);
+			GOTO(out_unlock_snap_dir, rc);
+		}
+	}
+
+	fid_zero(&info->mti_tmp_fid2);
+	rc = mdo_lookup(info->mti_env, mdt_object_child(snap_dir),
+			snap_name, &info->mti_tmp_fid2,
+			&info->mti_spec);
+
+	if (rc == 0 && !req_is_replay(req))
+		GOTO(out_unlock_snap_dir, rc = -EEXIST);
+
+	if (rc != 0 && rc != -ENOENT) {
+		CERROR("lookup error snapshot rc=%d\n",
+		       rc);
+		GOTO(out_unlock_snap_dir, rc);
+	}
+
+	ma->ma_attr.la_flags |= MDT_SNAPSHOT_FILE_PATT;
+	/* get original attributes */
+	if (rc == -ENOENT) {
+		/* save version of file name for replay,
+		   it must be ENOENT here */
+		mdt_enoent_version_save(info, 1);
+
+		/* new object */
+		snap = mdt_object_new(info->mti_env, mdt, snap_fid);
+		if (IS_ERR(snap)) {
+			rc = PTR_ERR(snap);
+			mdt_create_pack_capa(info, rc, NULL, repbody);
+			CERROR("does not create new object rc=%d\n"
+			       , rc);
+			GOTO(out_unlock_snap_dir, rc = PTR_ERR(snap));
+		}
+
+		rc = mdt_remote_permission(info, snap_dir, snap);
+		if (rc != 0) {
+			CERROR("remote permission error rc=%d\n", rc);
+			GOTO(out_free_snap, rc);
+		}
+
+		/* capa for cross-ref will be stored here */
+		ma->ma_capa = req_capsule_server_get(info->mti_pill,
+								&RMF_CAPA1);
+		LASSERT(ma->ma_capa);
+
+		/* Version of child will be updated on disk. */
+		tgt_vbr_obj_set(info->mti_env, mdt_obj2dt(snap));
+		rc = mdt_version_get_check_save(info, snap, 2);
+		if (rc) {
+			CERROR("version of child check error rc=%d\n", rc);
+			GOTO(out_free_snap, rc);
+		}
+
+		/* Let lower layer know current lock mode. */
+		info->mti_spec.sp_cr_mode =
+			mdt_dlm_mode2mdl_mode(snap_dir_lh->mlh_pdo_mode);
+
+		/*
+		 * Do not perform lookup sanity check. We know that name does
+		 * not exist.
+		 */
+		info->mti_spec.sp_cr_lookup = 0;
+		info->mti_spec.sp_feat = &dt_directory_features;
+
+		/* create snapshot object */
+		ma->ma_attr.la_valid |= LA_FLAGS;
+		ma->ma_valid = 0;
+		rc = mdo_create(info->mti_env,
+				mdt_object_child(snap_dir),
+				snap_name,
+				mdt_object_child(snap),
+				&info->mti_spec, ma);
+		if (rc) {
+			CERROR("create error snapshot object rc=%d\n", rc);
+			GOTO(out_free_snap, rc);
+		}
+		ma->ma_need = MA_INODE;
+		rc = mdt_attr_get_complex(info, snap, ma);
+		if (rc) {
+			CERROR("get inode attr error snapshot object rc=%d\n",
+			       rc);
+			GOTO(out_free_snap, rc);
+		}
+	} else {
+		snap = mdt_object_find(info->mti_env, mdt, snap_fid);
+		if (IS_ERR(snap)) {
+			rc = PTR_ERR(snap);
+			CERROR("find error snapshot object rc=%d\n", rc);
+			GOTO(out_unlock_snap_dir, rc);
+		}
+		ma->ma_need = MA_INODE;
+		rc = mo_attr_get(info->mti_env,
+					 mdt_object_child(snap), ma);
+		if (rc) {
+			CERROR("get attribute error snapshot"
+			       " object rc=%d\n", rc);
+			GOTO(out_delete_snap, rc);
+		}
+		if (!(ma->ma_attr.la_flags & LUSTRE_SNAPSHOT_FL)) {
+			CERROR("invalid snapshot inode flags\n");
+			rc = -EFAULT;
+			GOTO(out_delete_snap, rc);
+		}
+		ma->ma_valid = 0;
+		rc = mdt_stripe_get(info, snap,
+				    ma, XATTR_NAME_LOV);
+		if (rc == 0)
+			goto done_set_xattr;
+		ma->ma_valid |= MA_INODE;
+	}
+
+	/* return fid & attr to client */
+	mdt_pack_attr2body(info, repbody, &ma->ma_attr,
+			   mdt_object_fid(snap));
+
+	/* set lov attribute */
+	if (ma->ma_lmm_size > 0) {
+		buf->lb_buf = ma->ma_lmm;
+		buf->lb_len = ma->ma_lmm_size;
+		rc = mo_xattr_set(info->mti_env, mdt_object_child(snap),
+				  buf, XATTR_NAME_LOV, LU_XATTR_SNAPSHOT);
+		if (rc < 0) {
+			CERROR("set lov attribute error rc=%d\n", rc);
+			GOTO(out_delete_snap, rc);
+		}
+	}
+done_set_xattr:
+	/* file snapshot */
+	if (S_ISREG(ma->ma_attr.la_mode)) {
+		buf->lb_buf = (void *)rr->rr_eadata;
+		buf->lb_len = rr->rr_eadatalen;
+		LASSERT(buf->lb_len > 0);
+		/* call ost for snapshot clone on regular file */
+		rc = mo_snapshot_clone(info->mti_env,
+				       mdt_object_child(snap),
+				       ma,
+				       buf);
+		if (rc) {
+			CERROR("snapshot clone error rc=%d\n", rc);
+			GOTO(out_delete_snap, rc);
+		}
+	}
+
+	mdt_create_pack_capa(info, rc, snap, repbody);
+	if (S_ISREG(ma->ma_attr.la_mode)) {
+		ma->ma_lmm = req_capsule_server_get(info->mti_pill,
+						    &RMF_MDT_MD);
+		ma->ma_lmm_size = req_capsule_get_size(info->mti_pill,
+						       &RMF_MDT_MD,
+						       RCL_SERVER);
+		if (ma->ma_lmm_size < 0)
+			GOTO(out_delete_snap, rc = -EFAULT);
+
+		ma->ma_valid = 0;
+		ma->ma_need = MA_LOV;
+		rc = mdt_attr_get_complex(info, snap, ma);
+		if (rc) {
+			CERROR("get lov  error rc=%d\n",
+			       rc);
+			GOTO(out_delete_snap, rc);
+		}
+		LASSERT(ma->ma_valid & MA_LOV);
+		repbody->mbo_eadatasize = ma->ma_lmm_size;
+		repbody->mbo_valid |= OBD_MD_FLEASIZE;
+	}
+
+	/* free snapshot object */
+	mdt_object_put(info->mti_env, snap);
+
+	/* unlock & free snapshot directory */
+	mdt_object_unlock_put(info, snap_dir, snap_dir_lh, 0);
+
+	RETURN(0);
+
+out_delete_snap:
+	ma->ma_need = 0;
+	ma->ma_valid = 0;
+	rc2 = mdo_unlink(info->mti_env,
+			mdt_object_child(snap_dir),
+			mdt_object_child(snap),
+			snap_name, ma, 0);
+	if (rc2 != 0)
+		CERROR("failed to cleanup of create snapshot: "
+		       "rc = %d\n", rc2);
+
+out_free_snap:
+	/* free snapshot object */
+	mdt_create_pack_capa(info, rc, snap, repbody);
+	mdt_object_put(info->mti_env, snap);
+out_unlock_snap_dir:
+	/* unlock snap parent directory */
+	mdt_object_unlock(info, snap_dir, snap_dir_lh, rc);
+out_free_snap_dir:
+	/* free snap parent directory */
+	mdt_object_put(info->mti_env, snap_dir);
+
+out:
+	if (rc != -EEXIST)
+		DEBUG_REQ(D_ERROR, req,
+			  "failed to create snapshot rc=%d "
+			  "("DNAME"->"DFID") "
+			  "in "DFID" orig "DFID"",
+			  rc, PNAME(snap_name), PFID(snap_fid),
+			  PFID(snap_dir_fid), PFID(orig_fid));
+	RETURN(rc);
+}
+
+/*
+ *  mdt_md_snapshot_xattr()
+ *
+ *  set extented atribute for snapshot create
+ *
+ *  \param[in]	info	reint information
+ *
+ *  \retval	0	success
+ *  \retval	not 0	error code
+ */
+static int mdt_md_snapshot_xattr(struct mdt_thread_info *info)
+{
+	struct mdt_object	*snap;
+	struct mdt_reint_record	*rr = &info->mti_rr;
+	struct mdt_lock_handle	*snap_lh;
+	struct lu_fid		*snap_dir_fid = (struct lu_fid *)rr->rr_fid1;
+	struct lu_fid		*snap_fid = (struct lu_fid *)rr->rr_fid2;
+	struct lu_buf		*buf = &info->mti_buf;
+	struct lu_name		*snap_name = &rr->rr_name;
+	struct md_attr		*ma = &info->mti_attr;
+	__u64                   lockpart;
+	int			rc;
+	ENTRY;
+
+	DEBUG_REQ(D_INODE, mdt_info_req(info),
+		  "START TO SET XATTR SNAPSHOT ("DNAME"->"DFID") "
+		  "in "DFID"",
+		  PNAME(snap_name), PFID(snap_fid),
+		  PFID(snap_dir_fid));
+
+	/* check snapshot parent directory FID */
+	if (!fid_is_md_operative(snap_fid)) {
+		CERROR("check error snapshot parent directory FID\n");
+		RETURN(-EPERM);
+	}
+
+	rc = mdt_init_ucred_reint(info);
+	if (rc != 0)
+		RETURN(rc);
+
+	lockpart = MDS_INODELOCK_UPDATE;
+	/* Revoke all clients' lookup lock, since the access
+	 * permissions for this inode is changed when ACL_ACCESS is
+	 * set. This isn't needed for ACL_DEFAULT, since that does
+	 * not change the access permissions of this inode, nor any
+	 * other existing inodes. It is setting the ACLs inherited
+	 * by new directories/files at create time. */
+	/* We need revoke both LOOKUP|PERM lock here, see mdt_attr_set. */
+	if (!strcmp(info->mti_eaname.ln_name, XATTR_NAME_ACL_ACCESS))
+		lockpart |= MDS_INODELOCK_PERM | MDS_INODELOCK_LOOKUP;
+	/* We need to take the lock on behalf of old clients so that newer
+	 * clients flush their xattr caches */
+	else
+		lockpart |= MDS_INODELOCK_XATTR;
+
+	snap_lh = &info->mti_lh[MDT_LH_PARENT];
+	/* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX
+	 * to cancel them. */
+	mdt_lock_reg_init(snap_lh, LCK_EX);
+	snap = mdt_object_find_lock(info, snap_fid, snap_lh, lockpart);
+	if (rc) {
+		CERROR("lock error snapshot rc=%d\n", rc);
+		GOTO(out, rc);
+	}
+	if (!mdt_object_exists(snap)) {
+		CERROR("snapshot object not exist\n");
+		GOTO(out_unlock_snap, rc = -ENOENT);
+	}
+	tgt_vbr_obj_set(info->mti_env, mdt_obj2dt(snap));
+	rc = mdt_version_get_check_save(info, snap, 0);
+	if (rc) {
+		CERROR("version check error snapshot rc=%d\n", rc);
+		GOTO(out_unlock_snap, rc);
+	}
+
+	/* set external attribute */
+	if (info->mti_eaname.ln_namelen && ma->ma_lmm_size > 0) {
+		buf->lb_buf = ma->ma_lmm;
+		buf->lb_len = ma->ma_lmm_size;
+		rc = mo_xattr_set(info->mti_env, mdt_object_child(snap),
+			  buf, info->mti_eaname.ln_name, 0);
+		if (rc < 0) {
+			CERROR("set external attribute error rc=%d\n", rc);
+			GOTO(out_unlock_snap, rc);
+		}
+	}
+out_unlock_snap:
+	/* unlock snap parent directory */
+	mdt_object_unlock_put(info, snap, snap_lh, rc);
+
+	if (rc)
+		DEBUG_REQ(D_ERROR, mdt_info_req(info),
+			  "failed to setxattr snapshot rc=%d "
+			  "("DNAME"->"DFID") "
+			  "in "DFID"",
+			  rc, PNAME(snap_name), PFID(snap_fid),
+			  PFID(snap_dir_fid));
+out:
+	mdt_exit_ucred(info);
+	RETURN(rc);
+}
+
+/*
+ *  mdt_reint_snapshot()
+ *
+ *  call REINT_SNAPSHOT
+ *
+ *  \param[in]	info	reint information
+ *  \param[in]	lhc	lock handle
+ *
+ *  \retval	0	success
+ *  \retval	not 0	error code
+ */
+int mdt_reint_snapshot(struct mdt_thread_info *info,
+			struct mdt_lock_handle *lhc)
+{
+	struct ptlrpc_request	*req = mdt_info_req(info);
+	int			ope, snap_ope;
+	int			rc;
+	ENTRY;
+
+	CDEBUG(D_TRACE, "START TO CREATE SNAPSHOT\n");
+
+	/* set readonly flag */
+	tgt_snapshot_set(info->mti_env);
+
+	/* cancel ldlm lock */
+	if (info->mti_dlm_req)
+		ldlm_request_cancel(req, info->mti_dlm_req, 0);
+
+	/* check protocol */
+	if (!lu_name_is_valid(&info->mti_rr.rr_name)) {
+		CERROR("protocol error\n");
+		GOTO(out, rc = -EPROTO);
+	}
+
+	/* check if readonly */
+	if (exp_connect_flags(req->rq_export) & OBD_CONNECT_RDONLY) {
+		CERROR("readonly filesystem\n");
+		GOTO(out, rc = -EROFS);
+	}
+
+	switch (info->mti_attr.ma_attr.la_mode & S_IFMT) {
+	case S_IFDIR:
+		ope = LPROC_MDT_MKDIR;
+		snap_ope = SNAPSHOT_COUNT_MKDIR;
+		break;
+	case S_IFREG:
+		/* Special file should stay on the same node as parent. */
+		ope = LPROC_MDT_MKNOD;
+		snap_ope = SNAPSHOT_COUNT_MKNOD;
+		break;
+	case S_IFLNK:
+	case S_IFCHR:
+	case S_IFBLK:
+	case S_IFIFO:
+	case S_IFSOCK:
+	default:
+		CERROR("%s: Unsupported mode %o\n",
+		       mdt_obd_name(info->mti_mdt),
+		       info->mti_attr.ma_attr.la_mode);
+		GOTO(out, rc = err_serious(-EOPNOTSUPP));
+	}
+
+	if (info->mti_eaname.ln_namelen == 0) {
+		/* snapshot create */
+		mdt_counter_incr(req, ope);
+		mdt_snapshot_counter_incr(snap_ope);
+		rc = mdt_md_snapshot(info);
+		if (rc)
+			GOTO(out, rc);
+	} else {
+		/* snapshot set extend attribute */
+		mdt_counter_incr(req, LPROC_MDT_SETXATTR);
+		mdt_snapshot_counter_incr(SNAPSHOT_COUNT_SETXATTR);
+		rc = mdt_md_snapshot_xattr(info);
+		if (rc)
+			GOTO(out, rc);
+	}
+
+	/* clear readonly flag */
+	tgt_snapshot_clear(info->mti_env);
+
+	RETURN(0);
+out:
+	lustre_msg_set_transno(req->rq_repmsg, 0);
+
+	/* clear readonly flag */
+	tgt_snapshot_clear(info->mti_env);
+
+	RETURN(rc);
+}
+
 typedef int (*mdt_reinter)(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc);
 
@@ -2021,6 +2542,7 @@ static mdt_reinter reinters[REINT_MAX] = {
 	[REINT_SETXATTR] = mdt_reint_setxattr,
 	[REINT_RMENTRY]  = mdt_reint_unlink,
 	[REINT_MIGRATE]   = mdt_reint_migrate,
+	[REINT_SNAPSHOT]   = mdt_reint_snapshot,
 };
 
 int mdt_reint_rec(struct mdt_thread_info *info,
diff --git a/lustre/mdt/mdt_snapshot.c b/lustre/mdt/mdt_snapshot.c
new file mode 100644
index 0000000..b07474d
--- /dev/null
+++ b/lustre/mdt/mdt_snapshot.c
@@ -0,0 +1,79 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ *   Copyright(c) 2016-2017 FUJITSU LIMITED.
+ *   All rights reserved.
+ */
+
+#include "mdt_internal.h"
+
+static spinlock_t ss_count_lock;
+static __u64 ss_count[SNAPSHOT_COUNT_LAST];
+
+/*
+ *  mdt_snapshot_counter_init
+ *
+ *  initialize snapshot mdt counter
+ */
+void mdt_snapshot_counter_init(void)
+{
+	int i;
+
+	/* initialize spin lock */
+	spin_lock_init(&ss_count_lock);
+
+	/* initialize couter */
+	for (i = 0; i < SNAPSHOT_COUNT_LAST; i++)
+		ss_count[i] = 0;
+
+	return;
+}
+
+/*
+ *  mdt_snapshot_counter_incr
+ *
+ *  count up snapshot mdt counter
+ */
+inline void mdt_snapshot_counter_incr(int op)
+{
+	/* count up value */
+	spin_lock(&ss_count_lock);
+	ss_count[op]++;
+	spin_unlock(&ss_count_lock);
+
+	return;
+}
+
+/*
+ *  mdt_snapshot_counter_get
+ *
+ *  get snapshot mdt counter value
+ *
+ *  \param[in]	op		operation code
+ *
+ *  \retval	0 and more	count
+ */
+inline __u64 mdt_snapshot_counter_get(int op)
+{
+	/* return value */
+	return ss_count[op];
+}
diff --git a/lustre/obdclass/lprocfs_status_server.c b/lustre/obdclass/lprocfs_status_server.c
index 3ba8e70..a2975a0 100644
--- a/lustre/obdclass/lprocfs_status_server.c
+++ b/lustre/obdclass/lprocfs_status_server.c
@@ -668,8 +668,12 @@ void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats)
 	LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_del);
 	LPROCFS_OBD_OP_INIT(num_private_stats, stats, getref);
 	LPROCFS_OBD_OP_INIT(num_private_stats, stats, putref);
+	LPROCFS_OBD_OP_INIT(num_private_stats, stats, snapshot_lock);
+	LPROCFS_OBD_OP_INIT(num_private_stats, stats, snapshot_unlock);
+	LPROCFS_OBD_OP_INIT(num_private_stats, stats, snapshot_get_info);
+	LPROCFS_OBD_OP_INIT(num_private_stats, stats, snapshot_cancel_lock);
 
-	CLASSERT(NUM_OBD_STATS == OBD_COUNTER_OFFSET(putref) + 1);
+	CLASSERT(NUM_OBD_STATS == OBD_COUNTER_OFFSET(snapshot_cancel_lock) + 1);
 }
 EXPORT_SYMBOL(lprocfs_init_ops_stats);
 
diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c
index 774ec02..9b036ee 100644
--- a/lustre/ofd/ofd_dev.c
+++ b/lustre/ofd/ofd_dev.c
@@ -52,6 +52,9 @@
 
 #include "ofd_internal.h"
 
+#include <lustre_snapshot.h>
+#include <libcfs/libcfs.h>
+
 /* Slab for OFD object allocation */
 static struct kmem_cache *ofd_object_kmem;
 
@@ -66,6 +69,11 @@ static struct lu_kmem_descr ofd_caches[] = {
 	}
 };
 
+#define SNAP_PRECREATE_RETRY_MAX (10)
+static int snap_precreate_retry = SNAP_PRECREATE_RETRY_MAX;
+CFS_MODULE_PARM(snap_precreate_retry, "i", int, 0644,
+		"maximum of retry precreate for snapshot");
+
 static int ofd_connect_to_next(const struct lu_env *env, struct ofd_device *m,
 			       const char *next, struct obd_export **exp)
 {
@@ -742,6 +750,7 @@ int ofd_fiemap_get(const struct lu_env *env, struct ofd_device *ofd,
 {
 	struct ofd_object	*fo;
 	int			 rc;
+	void *lock = NULL;
 
 	fo = ofd_object_find(env, ofd, fid);
 	if (IS_ERR(fo)) {
@@ -750,12 +759,22 @@ int ofd_fiemap_get(const struct lu_env *env, struct ofd_device *ofd,
 		return PTR_ERR(fo);
 	}
 
+	rc = dt_snapshot_lock(ofd_object_child(fo), false, &lock);
+	if (rc) {
+		CERROR("fail to lock snapshot. err=%d\n",
+		       rc);
+		ofd_object_put(env, fo);
+		return rc;
+	}
+
 	ofd_read_lock(env, fo);
 	if (ofd_object_exists(fo))
 		rc = dt_fiemap_get(env, ofd_object_child(fo), fiemap);
 	else
 		rc = -ENOENT;
 	ofd_read_unlock(env, fo);
+	if (lock)
+		dt_snapshot_unlock(ofd_object_child(fo), lock);
 	ofd_object_put(env, fo);
 	return rc;
 }
@@ -1167,7 +1186,7 @@ static int ofd_orphans_destroy(const struct lu_env *env,
 		if (unlikely(rc != 0))
 			GOTO(out_put, rc);
 
-		rc = ofd_destroy_by_fid(env, ofd, fid, 1);
+		rc = ofd_destroy_by_fid(env, ofd, fid, OST_DESTRY_ORPHAN);
 		if (rc != 0 && rc != -ENOENT && rc != -ESTALE &&
 		    likely(rc != -EREMCHG && rc != -EINPROGRESS))
 			/* this is pretty fatal... */
@@ -1485,13 +1504,14 @@ static int ofd_destroy_hdl(struct tgt_session_info *tsi)
 	else
 		count = 1; /* default case - single destroy */
 
-	CDEBUG(D_HA, "%s: Destroy object "DOSTID" count %d\n", ofd_name(ofd),
-	       POSTID(&body->oa.o_oi), count);
+	CDEBUG(D_HA, "%s: Destroy object "DOSTID" "DFID" count %d\n",
+		ofd_name(ofd), POSTID(&body->oa.o_oi),  PFID(fid), count);
 
 	while (count > 0) {
 		int lrc;
 
-		lrc = ofd_destroy_by_fid(tsi->tsi_env, ofd, fid, 0);
+		lrc = ofd_destroy_by_fid(tsi->tsi_env, ofd, fid,
+						OST_DESTRY_NORMAL);
 		if (lrc == -ENOENT) {
 			CDEBUG(D_INODE,
 			       "%s: destroying non-existent object "DFID"\n",
@@ -1695,6 +1715,270 @@ out:
 	return rc;
 }
 
+/**
+ *  ofd_snapshot_clone()
+ *
+ *  Snapshot clone handler in OFD
+ *
+ *  \param[in]	tsi		target session information
+ *
+ *  \retval	0		success
+ *  \retval	less than 0	failure (-errno)
+ */
+static int ofd_snapshot_clone(struct tgt_session_info *tsi)
+{
+	const struct lu_env	*env = tsi->tsi_env;
+	struct ofd_thread_info	*fti = tsi2ofd_info(tsi);
+	struct ost_body		*req_body = tsi->tsi_ost_body;
+	struct ost_body		*rep_body;
+	struct lu_fid		*snap_fid, orig_fid;
+	struct ofd_device	*ofd = ofd_exp(tsi->tsi_exp);
+	struct ofd_object	*snap_obj, *orig_obj;
+	struct dt_object	*snap_osd, *orig_osd;
+	struct ldlm_res_id	resid;
+	struct filter_fid       *ff = NULL;
+	struct lustre_handle	lh = {0};
+	__u64			flags = 0;
+	void                    *lock = NULL;
+	struct thandle          *th;
+	int			rc;
+	ENTRY;
+
+	/* check request body */
+	LASSERT(req_body != NULL);
+
+	snap_fid = &req_body->oa.o_oi.oi_fid;
+
+	/* get reply body */
+	rep_body = req_capsule_server_get(tsi->tsi_pill, &RMF_OST_BODY);
+	if (rep_body == NULL) {
+		CERROR("fail to get reply body\n");
+		GOTO(out, rc = -ENOMEM);
+	}
+	orig_fid.f_seq = req_body->oa.o_snapshot_orig_seq;
+	orig_fid.f_oid = req_body->oa.o_snapshot_orig_oid;
+	orig_fid.f_ver = req_body->oa.o_snapshot_orig_ver;
+
+	/* set reply body */
+	rep_body->oa.o_oi.oi_fid = *snap_fid;
+	rep_body->oa.o_valid = OBD_MD_FLID;
+
+	/* lock original object by ldlm */
+	ost_fid_build_resid(&orig_fid, &resid);
+	rc = tgt_extent_lock(tsi->tsi_tgt->lut_obd->obd_namespace, &resid,
+				0, OBD_OBJECT_EOF, &lh, LCK_PW, &flags);
+	if (rc) {
+		CERROR("fail to lock snapshot original. err=%d\n", rc);
+		GOTO(out, rc);
+	}
+
+	/* copied from ofd_preprw_write() */
+	if (unlikely(tsi->tsi_exp->exp_obd->obd_recovering) ||
+	    (lustre_msg_get_flags(tgt_ses_req(tsi)->rq_reqmsg)
+	     & (MSG_RESENT | MSG_REPLAY))) {
+		obd_seq seq = fid_seq(snap_fid);
+		obd_id  oid = fid_oid(snap_fid);
+		struct ofd_seq *oseq;
+		int retry = 0;
+
+retry_seq_load:
+		oseq = ofd_seq_load(env, ofd, seq);
+		if (IS_ERR(oseq)) {
+			CERROR("%s: Can't find FID Sequence "LPX64": rc = %d\n",
+			       ofd_name(ofd), seq, (int)PTR_ERR(oseq));
+			rc = PTR_ERR(oseq);
+			if (rc == -ENOMEM &&
+			    retry++ < snap_precreate_retry) {
+				schedule_timeout_and_set_state(
+						     TASK_INTERRUPTIBLE,
+						     cfs_time_seconds(1));
+				CDEBUG(D_INODE, "retry ofd_seq_load"
+				       " (%d / %d)\n",
+				       retry, snap_precreate_retry);
+				GOTO(retry_seq_load , rc);
+			}
+			GOTO(out_unlock_orig_ldlm, rc);
+		}
+
+		if (oid > ofd_seq_last_oid(oseq)) {
+			int sync = 0;
+			int diff;
+			CDEBUG(D_INODE, "oid("LPX64") > last_oid("LPX64")\n",
+			       oid, ofd_seq_last_oid(oseq));
+			mutex_lock(&oseq->os_create_lock);
+			diff = oid - ofd_seq_last_oid(oseq);
+
+			/* Do sync create if the seq is about to used up */
+			if (fid_seq_is_idif(seq) || fid_seq_is_mdt0(seq)) {
+				if (unlikely(oid >= IDIF_MAX_OID - 1))
+					sync = 1;
+			} else if (fid_seq_is_norm(seq)) {
+				if (unlikely(oid >=
+					     LUSTRE_DATA_SEQ_MAX_WIDTH - 1))
+					sync = 1;
+			} else {
+				CERROR("%s : invalid o_seq "DOSTID"\n",
+				       ofd_name(ofd),
+				       POSTID(&req_body->oa.o_oi));
+				mutex_unlock(&oseq->os_create_lock);
+				ofd_seq_put(env, oseq);
+				GOTO(out_unlock_orig_ldlm, rc = -EINVAL);
+			}
+
+			while (diff > 0) {
+				obd_id next_id = ofd_seq_last_oid(oseq) + 1;
+				int count = ofd_precreate_batch(ofd, diff);
+
+retry_precreate:
+				rc = ofd_precreate_objects(env, ofd, next_id,
+							   oseq, count, sync);
+				if (rc == -ENOMEM &&
+				    retry++ < snap_precreate_retry) {
+					schedule_timeout_and_set_state(
+							   TASK_INTERRUPTIBLE,
+							   cfs_time_seconds(1));
+					CDEBUG(D_INODE, "retry"
+					       " precreate_objects"
+					       " (%d / %d)\n",
+					       retry, snap_precreate_retry);
+					GOTO(retry_precreate , rc);
+				}
+				if (rc < 0) {
+					mutex_unlock(&oseq->os_create_lock);
+					ofd_seq_put(env, oseq);
+					GOTO(out_unlock_orig_ldlm, rc);
+				}
+
+				diff -= rc;
+			}
+
+			mutex_unlock(&oseq->os_create_lock);
+		}
+
+		ofd_seq_put(env, oseq);
+	}
+
+	/* get snapshot object */
+	snap_obj = ofd_object_find_exists(env, ofd, snap_fid);
+	if (IS_ERR(snap_obj)) {
+		rc = PTR_ERR(snap_obj);
+		CERROR("fail to find snapshot object. "
+		       ""DFID" err=%d\n", PFID(snap_fid), rc);
+		GOTO(out_unlock_orig_ldlm, rc);
+	}
+
+	rc = ofd_attr_get(env, snap_obj, &fti->fti_attr);
+	if (rc) {
+		GOTO(out_free_snap_obj, rc);
+	} else if ((fti->fti_attr.la_flags & SNAPSHOT_FLAGS_MASK) ==
+		   OST_SNAPSHOT_FILE_PATT) {
+		/* processing has been completed already,
+		 * so there is nothing to do. */
+		GOTO(out_free_snap_obj, rc = 0);
+	}
+
+	/* get snapshot osd object */
+	snap_osd = ofd_object_child(snap_obj);
+
+	/* get original ofd object */
+	orig_obj = ofd_object_find_exists(env, ofd, &orig_fid);
+	if (IS_ERR(orig_obj)) {
+		rc = PTR_ERR(orig_obj);
+		CERROR("cannot find snapshot original. "
+		       ""DFID"err=%d\n", PFID(&orig_fid), rc);
+		GOTO(out_free_snap_obj, rc);
+	}
+
+	/* get original osd object */
+	orig_osd = ofd_object_child(orig_obj);
+
+	/* get snapshot attributes from request body */
+	la_from_obdo(&fti->fti_attr, &req_body->oa,
+		     req_body->oa.o_valid);
+
+	if (req_body->oa.o_valid & OBD_MD_FLFID) {
+		ff = &fti->fti_mds_fid;
+		ofd_prepare_fidea(ff, &req_body->oa);
+	}
+	/* set snapshot attributes */
+	/* not necessary to protect it with snapshot_lock */
+	rc = dt_snapshot_lock(orig_osd, true, &lock);
+	if (rc) {
+		CERROR("fail to lock snapshot. err=%d\n", rc);
+		GOTO(out_free_orig_obj, rc);
+	}
+	ofd_read_lock(env, orig_obj);
+	rc = ofd_write_attr_set(env, ofd, snap_obj,
+					&fti->fti_attr, ff,
+					1 /* is_snapshot */);
+	if (rc) {
+		CERROR("fail to set attributes. err=%d\n", rc);
+		GOTO(out_free_orig_obj, rc);
+	}
+
+	/* set snapshot attribute to reply body */
+	obdo_from_la(&rep_body->oa, &fti->fti_attr,
+		     LA_ATIME | LA_MTIME | LA_CTIME |
+		     LA_MODE | LA_UID | LA_GID);
+
+
+	th = ofd_trans_create(env, ofd);
+	if (IS_ERR(th)) {
+		rc = PTR_ERR(th);
+		CERROR("fail to trans_create. err=%d\n", rc);
+		GOTO(out_read_unlock, rc);
+	}
+
+	/* call osd in ost */
+	rc = dt_osd_declare_snapshot_clone(env, snap_osd, orig_osd,
+					   th, req_body->oa.o_flags);
+	if (rc) {
+		CERROR("fail to declare_snapshot_clone. err=%d\n", rc);
+		GOTO(out_stop, rc);
+	}
+
+	rc = ofd_trans_start(env, ofd, snap_obj, th);
+	if (rc) {
+		CERROR("fail to trans_start. err=%d\n", rc);
+		GOTO(out_stop, rc);
+	}
+	rc = dt_osd_snapshot_clone(snap_osd, orig_osd);
+
+out_stop:
+	ofd_trans_stop(env, ofd, th, rc);
+out_read_unlock:
+	/* unlock original object */
+	ofd_read_unlock(env, orig_obj);
+	dt_snapshot_unlock(orig_osd, lock);
+out_free_orig_obj:
+	/* free original object */
+	ofd_object_put(env, orig_obj);
+out_free_snap_obj:
+	/* free snapshot object */
+	ofd_object_put(env, snap_obj);
+out_unlock_orig_ldlm:
+	/* unlock orignal object by ldlm */
+	tgt_extent_unlock(&lh, LCK_PW);
+out:
+	if (rc == 0)
+		/* increment statistics */
+		ofd_counter_incr(tsi->tsi_exp,
+				 LPROC_OFD_STATS_CREATE,
+				 tsi->tsi_jobid,
+				 1);
+	else {
+		if (ff)
+			CERROR("snapshot clone failed."
+			       " mdt_fid="DFID" ost_fid="DFID""
+			       " orig_ost_fid="DFID"\n",
+			       PFID(&ff->ff_parent), PFID(snap_fid),
+			       PFID(&orig_fid));
+		else
+			CERROR("snapshot clone failed.\n");
+	}
+	RETURN(rc);
+}
+
 static int ofd_quotactl(struct tgt_session_info *tsi)
 {
 	struct obd_quotactl	*oqctl, *repoqc;
@@ -2047,6 +2331,8 @@ TGT_OST_HDL(0		| HABEO_REFERO,	OST_STATFS,	ofd_statfs_hdl),
 TGT_OST_HDL_HP(HABEO_CORPUS| HABEO_REFERO,
 					OST_BRW_READ,	tgt_brw_read,
 							ofd_hp_brw),
+TGT_OST_HDL(HABEO_CORPUS | HABEO_REFERO | MUTABOR,
+					OST_SNAPSHOT,   ofd_snapshot_clone),
 /* don't set CORPUS flag for brw_write because -ENOENT may be valid case */
 TGT_OST_HDL_HP(HABEO_CORPUS| MUTABOR,	OST_BRW_WRITE,	tgt_brw_write,
 							ofd_hp_brw),
diff --git a/lustre/ofd/ofd_dlm.c b/lustre/ofd/ofd_dlm.c
index 6337548..1a83ccb 100644
--- a/lustre/ofd/ofd_dlm.c
+++ b/lustre/ofd/ofd_dlm.c
@@ -41,6 +41,7 @@
 
 #define DEBUG_SUBSYSTEM S_FILTER
 
+#include <lustre_snapshot.h>
 #include "ofd_internal.h"
 
 struct ofd_intent_args {
@@ -107,6 +108,11 @@ int ofd_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock **lockp,
 		[DLM_REPLY_REC_OFF]   = sizeof(*reply_lvb)
 	};
 	struct ldlm_glimpse_work	 gl_work;
+	struct ofd_device		*ofd;
+	struct ofd_object		*fo = NULL;
+	struct ofd_thread_info		*info;
+	struct lu_env			 env;
+	bool				 snapshot = false;
 	CFS_LIST_HEAD(gl_list);
 	ENTRY;
 
@@ -139,6 +145,39 @@ int ofd_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock **lockp,
 			RETURN(ELDLM_LOCK_ABORTED);
 	}
 
+	/* copied from ofd_lvbo_update() */
+	ofd = ldlm_res_to_ns(res)->ns_lvbp;
+	LASSERT(ofd != NULL);
+
+	rc = lu_env_init(&env, LCT_DT_THREAD);
+	if (rc) {
+		/* if error occurred,
+		 * set "true" to snapshot to return ELDLM_LOCK_ABORTED
+		 */
+		snapshot = true;
+		goto skip;
+	}
+
+	info = ofd_info_init(&env, NULL);
+
+	ost_fid_from_resid(&info->fti_fid, &res->lr_name,
+			   ofd->ofd_lut.lut_lsd.lsd_osd_index);
+	fo = ofd_object_find(&env, ofd, &info->fti_fid);
+	if (IS_ERR(fo)) {
+		lu_env_fini(&env);
+		snapshot = true;
+		goto skip;
+	}
+	rc = ofd_attr_get(&env, fo, &info->fti_attr);
+	if (rc)
+		snapshot = true;
+	else if ((info->fti_attr.la_flags & SNAPSHOT_FLAGS_MASK) ==
+		 OST_SNAPSHOT_FILE_PATT)
+		snapshot = true;
+
+	ofd_object_put(&env, fo);
+	lu_env_fini(&env);
+skip:
 	LASSERT(ns == ldlm_res_to_ns(res));
 	lock_res(res);
 
@@ -161,7 +200,8 @@ int ofd_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock **lockp,
 		 * list (and potentially being added to l_pending_list by an
 		 * AST) when we are going to drop this lock ASAP. */
 		if (lock->l_export->exp_libclient ||
-		    OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_GLIMPSE, 2)) {
+		    OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_GLIMPSE, 2) ||
+		    snapshot) {
 			ldlm_resource_unlink_lock(lock);
 			err = ELDLM_LOCK_ABORTED;
 		} else {
diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h
index 934b109..bdf69f0 100644
--- a/lustre/ofd/ofd_internal.h
+++ b/lustre/ofd/ofd_internal.h
@@ -87,6 +87,13 @@ enum {
 	LPROC_OFD_STATS_LAST,
 };
 
+enum {
+	OST_DESTRY_NORMAL = 0,
+	OST_DESTRY_ORPHAN,
+	OST_DESTRY_SNAPSHOT_ORPHAN,
+	OST_DESTRY_LAST,
+};
+
 static inline void ofd_counter_incr(struct obd_export *exp, int opcode,
 				    char *jobid, long amount)
 {
@@ -377,6 +384,9 @@ void ofd_seqs_free(const struct lu_env *env, struct ofd_device *ofd);
 /* ofd_io.c */
 int ofd_start_inconsistency_verification_thread(struct ofd_device *ofd);
 int ofd_stop_inconsistency_verification_thread(struct ofd_device *ofd);
+int ofd_write_attr_set(const struct lu_env *env, struct ofd_device *ofd,
+			struct ofd_object *ofd_obj, struct lu_attr *la,
+			struct filter_fid *ff, int is_snapshot);
 int ofd_verify_ff(const struct lu_env *env, struct ofd_object *fo,
 		  struct obdo *oa);
 int ofd_preprw(const struct lu_env *env,int cmd, struct obd_export *exp,
diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c
index 3d7e60a..ef61a84 100644
--- a/lustre/ofd/ofd_io.c
+++ b/lustre/ofd/ofd_io.c
@@ -542,6 +542,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
 err:
 	dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local);
 	ofd_read_unlock(env, fo);
+	ofd_object_put(env, fo);
 	/* ofd_grant_prepare_write() was called, so we must commit */
 	ofd_grant_commit(env, exp, rc);
 out:
@@ -662,10 +663,10 @@ ofd_commitrw_read(const struct lu_env *env, struct ofd_device *ofd,
 	RETURN(0);
 }
 
-static int
+int
 ofd_write_attr_set(const struct lu_env *env, struct ofd_device *ofd,
 		   struct ofd_object *ofd_obj, struct lu_attr *la,
-		   struct filter_fid *ff)
+		   struct filter_fid *ff, int is_snapshot)
 {
 	struct ofd_thread_info	*info = ofd_info(env);
 	__u64			 valid = la->la_valid;
@@ -848,7 +849,7 @@ ofd_commitrw_write(const struct lu_env *env, struct obd_export *exp,
 	 * dt_declare_write_commit() since quota enforcement is now handled in
 	 * declare phases.
 	 */
-	rc = ofd_write_attr_set(env, ofd, fo, la, ff);
+	rc = ofd_write_attr_set(env, ofd, fo, la, ff, 0);
 	if (rc)
 		GOTO(out, rc);
 
diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c
index 7b3fa77..a9f192a 100644
--- a/lustre/ofd/ofd_obd.c
+++ b/lustre/ofd/ofd_obd.c
@@ -48,6 +48,9 @@
 #include <lustre_ioctl.h>
 #include <lustre_quota.h>
 #include <lustre_lfsck.h>
+#include <lustre_snapshot.h>
+
+#define SNAPSHOT_ORPHAN_ARRAY_MAX 3
 
 static int ofd_export_stats_init(struct ofd_device *ofd,
 				 struct obd_export *exp, void *client_nid)
@@ -754,6 +757,121 @@ out:
 	return rc;
 }
 
+static int ofd_snapshot_destroy(const struct lu_env *env,
+				struct ofd_device *ofd,
+				struct ofd_object *fo, bool *is_snap)
+{
+	void                    *lock = NULL;
+	int rc = 0, i;
+	int array_num = SNAPSHOT_ORPHAN_ARRAY_MAX;
+	struct lu_fid *fid_array = NULL;
+	struct lu_fid orig_fid;
+	struct ofd_thread_info *info = ofd_info(env);
+	ENTRY;
+
+	/* if target is not snapshot_orig or snapshot,
+	 * dt_snapshot_lock() doesn't hold lock
+	 * and lock is NULL.
+	 */
+	rc = dt_snapshot_lock(ofd_object_child(fo), false, &lock);
+	if (rc) {
+		CERROR("fail to lock snapshot. err=%d\n", rc);
+		RETURN(rc);
+	}
+	if (!lock) {
+		*is_snap = false;
+		RETURN(0);
+	}
+	*is_snap = true;
+
+	rc = ofd_attr_get(env, fo, &info->fti_attr);
+	if (rc) {
+		CERROR("fail to get inode flags\n");
+		GOTO(unlock, rc);
+	}
+	info->fti_attr.la_valid = LA_FLAGS;
+	info->fti_attr.la_flags |= LUSTRE_SNAPSHOT_FL;
+
+	ofd_read_lock(env, fo);
+	rc = ofd_write_attr_set(env, ofd, fo, &info->fti_attr, NULL,
+				1 /* is_snapshot */);
+	ofd_read_unlock(env, fo);
+	if (rc) {
+		CERROR("fail to set del flag\n");
+		GOTO(unlock, rc);
+	}
+
+	OBD_ALLOC(fid_array, sizeof(struct lu_fid) * SNAPSHOT_ORPHAN_ARRAY_MAX);
+	if (fid_array == NULL) {
+		CERROR("fail to alloc fid_array\n");
+		GOTO(unlock, rc = -ENOMEM);
+	}
+
+	ofd_write_lock(env, fo);
+	if (!ofd_object_exists(fo)) {
+		OBD_FREE(fid_array,
+			 sizeof(struct lu_fid) * SNAPSHOT_ORPHAN_ARRAY_MAX);
+		ofd_write_unlock(env, fo);
+		GOTO(unlock, rc = -ENOENT);
+	}
+retry:
+	/* get list of orphan inode to delete */
+	rc = dt_snapshot_get_orphan(ofd_object_child(fo),
+				    fid_array, &array_num);
+	if ((rc == 0 || rc == -EAGAIN) && (array_num != 0)) {
+		for (i = 0; i < array_num; i++) {
+			ofd_destroy_by_fid(env,
+					   ofd, &fid_array[i],
+					   OST_DESTRY_SNAPSHOT_ORPHAN);
+		}
+	}
+	if (rc == -EAGAIN) {
+		memset(fid_array, 0, sizeof(struct lu_fid)
+					* SNAPSHOT_ORPHAN_ARRAY_MAX);
+		array_num = SNAPSHOT_ORPHAN_ARRAY_MAX;
+		goto retry;
+	}
+	OBD_FREE(fid_array, sizeof(struct lu_fid) * SNAPSHOT_ORPHAN_ARRAY_MAX);
+
+	rc = dt_snapshot_destroy(ofd_object_child(fo), &orig_fid);
+	if (rc < 0)
+		CERROR("fail to snapshot destroy. err=%d\n", rc);
+
+	ofd_write_unlock(env, fo);
+unlock:
+	dt_snapshot_unlock(ofd_object_child(fo), lock);
+
+	if (rc == 1) {
+		/* In the case of the last snapshot deletion,
+		 * get LCK_PW to notify client a change of i_blocks by
+		 * the deletion of snapshot_link */
+		struct lustre_handle     lockh;
+		ldlm_policy_data_t	 policy = {
+			.l_extent = { 0, OBD_OBJECT_EOF, 0 }
+		};
+		__u64                    flags = 0;
+		struct ldlm_res_id      res_id;
+
+		ost_fid_build_resid((const struct lu_fid *)&orig_fid, &res_id);
+		rc = ldlm_cli_enqueue_local(ofd->ofd_namespace, &res_id,
+					    LDLM_EXTENT,
+					    &policy, LCK_PW, &flags,
+					    ldlm_blocking_ast,
+					    ldlm_completion_ast,
+					    NULL, NULL, 0, LVB_T_NONE,
+					    NULL, &lockh);
+		if (rc == ELDLM_OK)
+			ldlm_lock_decref(&lockh, LCK_PW);
+		else
+			/* deletion processing continues
+			 * even if failed to get lock */
+			rc = 0;
+	}
+
+	RETURN(rc);
+}
+
+
 int ofd_destroy_by_fid(const struct lu_env *env, struct ofd_device *ofd,
 		       const struct lu_fid *fid, int orphan)
 {
@@ -762,9 +880,11 @@ int ofd_destroy_by_fid(const struct lu_env *env, struct ofd_device *ofd,
 	__u64			 flags = LDLM_FL_AST_DISCARD_DATA;
 	__u64			 rc = 0;
 	ldlm_policy_data_t	 policy = {
-					.l_extent = { 0, OBD_OBJECT_EOF }
+					.l_extent = { 0, OBD_OBJECT_EOF, 0 }
 				 };
 	struct ofd_object	*fo;
+	int err = 0;
+	bool is_snap = false;
 
 	ENTRY;
 
@@ -785,12 +905,17 @@ int ofd_destroy_by_fid(const struct lu_env *env, struct ofd_device *ofd,
 		ldlm_lock_decref(&lockh, LCK_PW);
 
 	LASSERT(fo != NULL);
-
-	rc = ofd_object_destroy(env, fo, orphan);
+	if (orphan != OST_DESTRY_SNAPSHOT_ORPHAN) {
+		err = ofd_snapshot_destroy(env, ofd, fo, &is_snap);
+		if (err)
+			GOTO(out, err);
+	}
+	err = ofd_object_destroy(env, fo, orphan);
 	EXIT;
-
+ out:
 	ofd_object_put(env, fo);
-	RETURN(rc);
+
+	RETURN(err);
 }
 
 /* needed by echo client only for now, RPC handler uses ofd_destroy_hdl() */
@@ -809,7 +934,7 @@ int ofd_echo_destroy(const struct lu_env *env, struct obd_export *exp,
 
 	CDEBUG(D_HA, "%s: Destroy object "DFID"\n", ofd_name(ofd), PFID(fid));
 
-	rc = ofd_destroy_by_fid(env, ofd, fid, 0);
+	rc = ofd_destroy_by_fid(env, ofd, fid, OST_DESTRY_NORMAL);
 	if (rc == -ENOENT) {
 		CDEBUG(D_INODE, "%s: destroying non-existent object "DFID"\n",
 		       ofd_name(ofd), PFID(fid));
@@ -996,6 +1121,77 @@ out:
 	return rc;
 }
 
+static int ofd_ioc_snapshot_orphan(const struct lu_env *env,
+				   struct ofd_device *ofd, void *karg)
+{
+	struct obd_ioctl_data *data = karg;
+	int		       rc = 0;
+	struct lu_buf *bufp, buf;
+
+	ENTRY;
+
+	if (data->ioc_plen1 && data->ioc_pbuf1) {
+		OBD_ALLOC(buf.lb_buf, data->ioc_plen1);
+		if (buf.lb_buf == NULL)
+			RETURN(-ENOMEM);
+		rc = copy_from_user(buf.lb_buf, data->ioc_pbuf1,
+						data->ioc_plen1);
+		if (rc) {
+			OBD_FREE(buf.lb_buf, data->ioc_plen1);
+			RETURN(-EFAULT);
+		}
+		buf.lb_len = data->ioc_plen1;
+		bufp = &buf;
+	} else
+		bufp = NULL;
+
+	if (data->ioc_command == OBD_IOC_SNAPSHOT_ORPHAN_INODE ||
+	    data->ioc_command == OBD_IOC_SNAPSHOT_ORPHAN_DEL) {
+		struct ofd_object     *fo = NULL;
+		struct lu_fid	       fid =
+			*(struct lu_fid *)data->ioc_inlbuf1;
+
+		if (!fid_is_sane(&fid))
+			GOTO(out, rc = -EBFONT);
+
+		fo = ofd_object_find(env, ofd, &fid);
+		if (IS_ERR(fo))
+			GOTO(out, rc = PTR_ERR(fo));
+
+		if (!ofd_object_exists(fo)) {
+			ofd_object_put(env, fo);
+			GOTO(out, rc = -ENOENT);
+		}
+		if (data->ioc_command == OBD_IOC_SNAPSHOT_ORPHAN_INODE) {
+			rc = dt_osd_snapshot_get_old_list(ofd_object_child(fo),
+							  (void *)bufp);
+		} else {
+			/* check if there is an old snapshot */
+			rc = dt_osd_snapshot_get_old_list(ofd_object_child(fo),
+							  NULL);
+			if (rc == 0)
+				rc = ofd_destroy_by_fid(env, ofd, &fid,
+							OST_DESTRY_ORPHAN);
+		}
+		ofd_object_put(env, fo);
+	} else {
+		if (!bufp)
+			GOTO(out, rc = -EFAULT);
+		rc = dt_snapshot_list_orphan(ofd->ofd_osd, (void *)bufp);
+	}
+	if (bufp) { /* -EAGAIN has valid return data */
+		int rc2;
+		rc2 = obd_ioctl_popdata(data->ioc_pbuf1, buf.lb_buf,
+							data->ioc_plen1);
+		if (rc2 != 0)
+			rc = rc2;
+	}
+out:
+	if (bufp)
+		OBD_FREE(buf.lb_buf, data->ioc_plen1);
+	RETURN(rc);
+}
+
 int ofd_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 		  void *karg, void *uarg)
 {
@@ -1057,6 +1253,9 @@ int ofd_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 	case OBD_IOC_GET_OBJ_VERSION:
 		rc = ofd_ioc_get_obj_version(&env, ofd, karg);
 		break;
+	case OBD_IOC_SNAPSHOT_ORPHAN:
+		rc = ofd_ioc_snapshot_orphan(&env, ofd, karg);
+		break;
 	default:
 		CERROR("%s: not supported cmd = %d\n", obd->obd_name, cmd);
 		rc = -ENOTTY;
@@ -1174,6 +1373,44 @@ static int ofd_quotactl(struct obd_device *obd, struct obd_export *exp,
 	RETURN(rc);
 }
 
+static int ofd_snapshot_lock(const struct lu_env *env,
+			     struct obd_export *exp,
+			     struct obdo *oa,
+			     bool create, void **lock)
+{
+	struct ofd_device	*ofd = ofd_exp(exp);
+	struct lu_fid		*fid = &oa->o_oi.oi_fid;
+	struct ofd_object	*fo;
+	int rc = 0;
+	ENTRY;
+
+	fo = ofd_object_find_exists(env, ofd, fid);
+	if (IS_ERR(fo))
+		RETURN(PTR_ERR(fo));
+
+	rc = dt_snapshot_lock(ofd_object_child(fo), create, lock);
+	ofd_object_put(env, fo);
+	RETURN(rc);
+}
+
+static int ofd_snapshot_unlock(const struct lu_env *env,
+			       struct obd_export *exp,
+			       struct obdo *oa, void *lock)
+{
+	struct ofd_device	*ofd = ofd_exp(exp);
+	struct lu_fid		*fid = &oa->o_oi.oi_fid;
+	struct ofd_object	*fo;
+	ENTRY;
+
+	fo = ofd_object_find_exists(env, ofd, fid);
+	if (IS_ERR(fo))
+		RETURN(PTR_ERR(fo));
+
+	dt_snapshot_unlock(ofd_object_child(fo), lock);
+	ofd_object_put(env, fo);
+	RETURN(0);
+}
+
 struct obd_ops ofd_obd_ops = {
 	.o_owner		= THIS_MODULE,
 	.o_connect		= ofd_obd_connect,
@@ -1196,4 +1433,6 @@ struct obd_ops ofd_obd_ops = {
 	.o_quotactl		= ofd_quotactl,
 	.o_set_info_async	= ofd_set_info_async,
 	.o_get_info		= ofd_get_info,
+	.o_snapshot_lock	= ofd_snapshot_lock,
+	.o_snapshot_unlock	= ofd_snapshot_unlock,
 };
diff --git a/lustre/ofd/ofd_objects.c b/lustre/ofd/ofd_objects.c
index c60e18f..c4b61c2 100644
--- a/lustre/ofd/ofd_objects.c
+++ b/lustre/ofd/ofd_objects.c
@@ -44,7 +44,7 @@
 #include <dt_object.h>
 #include <lustre/lustre_idl.h>
 #include <lustre_lfsck.h>
-
+#include <lustre_snapshot.h>
 #include "ofd_internal.h"
 
 int ofd_version_get_check(struct ofd_thread_info *info,
@@ -491,6 +491,7 @@ int ofd_object_punch(const struct lu_env *env, struct ofd_object *fo,
 	struct thandle		*th;
 	int			 ff_needed = 0;
 	int			 rc;
+	void *lock = NULL;
 
 	ENTRY;
 
@@ -502,6 +503,15 @@ int ofd_object_punch(const struct lu_env *env, struct ofd_object *fo,
 		fmd->fmd_mactime_xid = info->fti_xid;
 	ofd_fmd_put(info->fti_exp, fmd);
 
+	/* if taget is not snapshot_orig,
+	 * dt_snapshot_lock() doesn't hold snapshot_lock
+	 * and lock is NULL */
+	rc = dt_snapshot_lock(dob, false, &lock);
+	if (rc) {
+		CERROR("fail to lock snapshot. err=%d\n",
+		       rc);
+		return rc;
+	}
 	ofd_write_lock(env, fo);
 	if (!ofd_object_exists(fo))
 		GOTO(unlock, rc = -ENOENT);
@@ -585,7 +595,8 @@ stop:
 	ofd_trans_stop(env, ofd, th, rc);
 unlock:
 	ofd_write_unlock(env, fo);
-
+	if (lock)
+		dt_snapshot_unlock(dob, lock);
 	return rc;
 }
 
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c
index 38b0d99..7b9fd78 100644
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -3267,6 +3267,23 @@ static int osc_process_config(struct obd_device *obd, obd_count len, void *buf)
         return osc_process_config_base(obd, buf);
 }
 
+static int osc_snapshot_cancel_lock(struct obd_export *exp, void *val)
+{
+	struct obdo oa;
+	struct list_head cancels = LIST_HEAD_INIT(cancels);
+	struct ost_id *id = val;
+	int count;
+	ENTRY;
+
+	memcpy(&oa.o_oi, id, sizeof(struct ost_id));
+	/* Specify LCK_PW to cancel LCK_PR */
+	count = osc_resource_get_unused(exp, &oa, &cancels, LCK_PW,
+					LDLM_FL_DISCARD_DATA);
+	if (count == 0)
+		RETURN(0);
+	RETURN(ldlm_cli_cancel_list(&cancels, count, NULL, 0));
+}
+
 struct obd_ops osc_obd_ops = {
         .o_owner                = THIS_MODULE,
         .o_setup                = osc_setup,
@@ -3295,6 +3312,7 @@ struct obd_ops osc_obd_ops = {
         .o_process_config       = osc_process_config,
         .o_quotactl             = osc_quotactl,
         .o_quotacheck           = osc_quotacheck,
+	.o_snapshot_cancel_lock = osc_snapshot_cancel_lock,
 };
 
 extern struct lu_kmem_descr osc_caches[];
diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c
index 0c5a07d..9a86be9 100644
--- a/lustre/osd-ldiskfs/osd_handler.c
+++ b/lustre/osd-ldiskfs/osd_handler.c
@@ -72,7 +72,8 @@
 #include <md_object.h>
 #include <lustre_quota.h>
 
-#include <ldiskfs/xattr.h>
+/* snapshot lock */
+#include <ldiskfs/snapshot.h>
 
 int ldiskfs_pdo = 1;
 CFS_MODULE_PARM(ldiskfs_pdo, "i", int, 0644,
@@ -1430,6 +1431,68 @@ static int osd_init_capa_ctxt(const struct lu_env *env, struct dt_device *d,
         RETURN(0);
 }
 
+/*
+ *  osd_snapshot_get_enable
+ *
+ *  snapshot get enable status
+ *
+ *  \param[in]	lu_env		lustre environment
+ *  \param[in]	dt		osd dt device
+ *
+ *  \retval	0		snapshot is disabled
+ *  \retval	1		snapshot is enabled
+ */
+static int osd_snapshot_get_enable(const struct lu_env *env,
+				   struct dt_device *dt)
+{
+	struct osd_device *osd = osd_dt_dev(dt);
+	struct super_block *sb = osd_sb(osd);
+	int rc;
+	ENTRY;
+
+	/* check if snapshot is enabled */
+	rc = ldiskfs_snapshot_get_enable(sb);
+
+	RETURN(rc);
+}
+
+/*
+ *  osd_snapshot_set_enable
+ *
+ *  snapshot set enable status
+ *
+ *  \param[in]	lu_env		lustre environment
+ *  \param[in]	dt		osd dt device
+ *
+ *  \retval	0		success
+ *  \retval	less than 0	failure (-errno)
+ */
+static int osd_snapshot_set_enable(const struct lu_env *env,
+				   struct dt_device *dt)
+{
+	struct osd_device *osd = osd_dt_dev(dt);
+	struct super_block *sb = osd_sb(osd);
+	int rc;
+	ENTRY;
+
+	/* snapshot is enable */
+	rc = ldiskfs_snapshot_set_enable(sb);
+
+	RETURN(rc);
+}
+
+static int osd_snapshot_list_orphan(struct dt_device *dt,
+					void *buf)
+{
+	struct osd_device *osd = osd_dt_dev(dt);
+	struct super_block *sb = osd_sb(osd);
+	int rc;
+	ENTRY;
+
+	rc = ldiskfs_snapshot_list_orphan(sb, buf);
+	RETURN(rc);
+}
+
 /**
  * Note: we do not count into QUOTA here.
  * If we mount with --data_journal we may need more.
@@ -1500,6 +1563,9 @@ static const struct dt_device_operations osd_dt_ops = {
         .dt_ro             = osd_ro,
         .dt_commit_async   = osd_commit_async,
         .dt_init_capa_ctxt = osd_init_capa_ctxt,
+	.dt_snapshot_get_enable = osd_snapshot_get_enable,
+	.dt_snapshot_set_enable = osd_snapshot_set_enable,
+	.dt_snapshot_list_orphan = osd_snapshot_list_orphan,
 };
 
 static void osd_object_read_lock(const struct lu_env *env,
@@ -3389,6 +3455,202 @@ static int osd_otable_it_attr_get(const struct lu_env *env,
 	return 0;
 }
 
+static int osd_declare_snapshot_clone(const struct lu_env *env,
+				      struct dt_object *snap_dt,
+				      struct dt_object *orig_dt,
+				      struct thandle *handle,
+				      int ignore_flag)
+{
+	struct osd_object	*snap_obj = osd_dt_obj(snap_dt);
+	struct osd_object	*orig_obj = osd_dt_obj(orig_dt);
+	struct inode		*snap_inode = snap_obj->oo_inode;
+	struct inode		*orig_inode = orig_obj->oo_inode;
+	struct osd_thandle      *oh;
+	int			rc;
+	int                     ignore_quota =
+		(ignore_flag & OBD_BRW_NOQUOTA) ? 1 : 0;
+	int                     quota_space_blocks =
+		(sizeof(struct ldiskfs_snapshot_link) +
+		 LDISKFS_BLOCK_SIZE(snap_inode->i_sb) - 1)
+		>> LDISKFS_BLOCK_SIZE_BITS(snap_inode->i_sb);
+	long long               quota_space =
+		toqb(quota_space_blocks <<
+				LDISKFS_BLOCK_SIZE_BITS(snap_inode->i_sb));
+
+	ENTRY;
+
+	LASSERT(handle != NULL);
+	oh = container_of0(handle, struct osd_thandle, ot_super);
+	LASSERT(oh->ot_handle == NULL);
+
+	if (!dt_object_exists(snap_dt))
+		RETURN(-ENOENT);
+	if (!dt_object_exists(orig_dt))
+		RETURN(-ENOENT);
+
+	quota_space_blocks = (sizeof(struct ldiskfs_snapshot_link) +
+			      LDISKFS_BLOCK_SIZE(snap_inode->i_sb) - 1)
+			     >> LDISKFS_BLOCK_SIZE_BITS(snap_inode->i_sb);
+	quota_space = toqb(quota_space_blocks
+			   << LDISKFS_BLOCK_SIZE_BITS(snap_inode->i_sb));
+
+	/* for ldiskfs_snapshot_set_enable() */
+	if (!ldiskfs_snapshot_get_enable(snap_inode->i_sb))
+		oh->ot_credits++;
+
+	oh->ot_credits += ldiskfs_calc_snapshot_link_credits(snap_inode,
+							    SNAPSHOT_CREATE_OP);
+
+	if (!LDISKFS_TEST_OST_SNAPSHOT(orig_inode)) {
+		rc = osd_declare_inode_qid(env, i_uid_read(orig_inode),
+					   i_gid_read(orig_inode),
+					   quota_space, oh,
+					   orig_obj, true, NULL,
+					   ignore_quota);
+		if (rc) {
+			CERROR("fail to orig osd_declare_inode_qid. rc=%d\n",
+			       rc);
+			RETURN(rc);
+		}
+	}
+	rc = osd_declare_inode_qid(env, i_uid_read(snap_inode),
+				   i_gid_read(snap_inode),
+				   quota_space, oh,
+				   snap_obj, true, NULL,
+				   ignore_quota);
+	if (rc)
+		CERROR("fail to snapshot osd_declare_inode_qid. rc=%d\n",
+		       rc);
+	RETURN(rc);
+}
+
+/*
+ * osd_snapshot_clone()
+ *
+ * create snapshot at osd layer
+ *
+ * \param[in]	snap_dt		snapshot osd object
+ * \param[in]	orig_dt		original osd object
+ *
+ * \retval	0		success
+ * \retval	less than 0	failure (error code -errno)
+ */
+static int osd_snapshot_clone(struct dt_object *snap_dt,
+			      struct dt_object *orig_dt)
+{
+	struct osd_object	*snap_obj = osd_dt_obj(snap_dt);
+	struct osd_object	*orig_obj = osd_dt_obj(orig_dt);
+	struct osd_device	*osd = osd_dev(snap_dt->do_lu.lo_dev);
+	struct super_block	*sb = osd_sb(osd);
+	struct inode		*snap_inode = snap_obj->oo_inode;
+	struct inode		*orig_inode = orig_obj->oo_inode;
+	int			rc;
+	ENTRY;
+
+	LASSERT(snap_inode);
+	LASSERT(orig_inode);
+
+	/* check if snapshot is enabled */
+	rc = ldiskfs_snapshot_get_enable(sb);
+	if (rc != 1) {
+		rc = ldiskfs_snapshot_set_enable(sb);
+		if (rc)
+			RETURN(rc);
+	}
+
+	/* clone inode extents */
+	rc = ldiskfs_snapshot_clone(snap_inode, orig_inode);
+
+	RETURN(rc);
+}
+
+static int osd_snapshot_get_old_list(struct dt_object *snap_dt,
+				     void *list_buf)
+{
+	ENTRY;
+
+	if (!dt_object_exists(snap_dt))
+		RETURN(-ENOENT);
+
+	RETURN(ldiskfs_snapshot_get_old_list(osd_dt_obj(snap_dt)->oo_inode,
+					     list_buf));
+}
+
+/*
+ * osd_snapshot_lock()
+ *
+ * snapshot lock for all generations
+ *
+ * \param[in]	dt		osd object
+ * \param[in]	create		for create snapshot
+ * \param[out]	lock		snapshot lock object
+ *
+ * \retval	0		success
+ * \retval	less than 0	failure (error code -errno)
+ */
+static int osd_snapshot_lock(struct dt_object *dt, bool create,
+			     void **lock)
+{
+	struct osd_object	*osd_obj = osd_dt_obj(dt);
+	struct inode		*inode = osd_obj->oo_inode;
+	int			rc;
+	ENTRY;
+
+	if (!dt_object_exists(dt))
+		RETURN(-ENOENT);
+
+	/* lock snapshot at ldiskfs layer */
+	rc = ldiskfs_snapshot_lock(inode, create,
+			(struct ldiskfs_snapshot_gen_lock **)lock);
+
+	RETURN(rc);
+}
+
+/*
+ * osd_snapshot_unlock()
+ *
+ * snapshot unlock for all generations
+ *
+ * \param[in]	lock		snapshot lock object
+ *
+ * \retval	none
+ */
+static void osd_snapshot_unlock(void *lock)
+{
+	ENTRY;
+
+	/* unlock snapshot at ldiskfs layer */
+	ldiskfs_snapshot_unlock((struct ldiskfs_snapshot_gen_lock *)lock);
+
+	EXIT;
+}
+
+static int osd_snapshot_get_orphan(struct dt_object *dt,
+				   void *fid_buf,
+				   int *array_num)
+{
+	struct osd_object	*osd_obj = osd_dt_obj(dt);
+	struct inode		*inode = osd_obj->oo_inode;
+	ENTRY;
+
+	LASSERT(inode);
+
+	RETURN(ldiskfs_snapshot_get_orphan(inode,
+					   fid_buf,
+					   array_num));
+}
+
+static int osd_snapshot_destroy(struct dt_object *dt, void *orig_fid)
+{
+	struct osd_object	*osd_obj = osd_dt_obj(dt);
+	struct inode		*inode = osd_obj->oo_inode;
+	ENTRY;
+
+	LASSERT(inode);
+
+	RETURN(ldiskfs_snapshot_destroy(inode, orig_fid));
+}
+
 static const struct dt_object_operations osd_obj_ops = {
         .do_read_lock         = osd_object_read_lock,
         .do_write_lock        = osd_object_write_lock,
@@ -3417,6 +3679,13 @@ static const struct dt_object_operations osd_obj_ops = {
         .do_capa_get          = osd_capa_get,
         .do_object_sync       = osd_object_sync,
         .do_data_get          = osd_data_get,
+	.do_osd_snapshot_clone = osd_snapshot_clone,
+	.do_osd_declare_snapshot_clone = osd_declare_snapshot_clone,
+	.do_osd_snapshot_get_old_list = osd_snapshot_get_old_list,
+	.do_osd_snapshot_lock = osd_snapshot_lock,
+	.do_osd_snapshot_unlock = osd_snapshot_unlock,
+	.do_osd_snapshot_destroy = osd_snapshot_destroy,
+	.do_osd_snapshot_get_orphan = osd_snapshot_get_orphan,
 };
 
 /**
@@ -3451,6 +3720,13 @@ static const struct dt_object_operations osd_obj_ea_ops = {
         .do_capa_get          = osd_capa_get,
         .do_object_sync       = osd_object_sync,
         .do_data_get          = osd_data_get,
+	.do_osd_snapshot_clone = osd_snapshot_clone,
+	.do_osd_declare_snapshot_clone = osd_declare_snapshot_clone,
+	.do_osd_snapshot_get_old_list = osd_snapshot_get_old_list,
+	.do_osd_snapshot_lock = osd_snapshot_lock,
+	.do_osd_snapshot_unlock = osd_snapshot_unlock,
+	.do_osd_snapshot_destroy = osd_snapshot_destroy,
+	.do_osd_snapshot_get_orphan = osd_snapshot_get_orphan,
 };
 
 static const struct dt_object_operations osd_obj_otable_it_ops = {
diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c
index fdb7915..d4f0f5f 100644
--- a/lustre/osd-ldiskfs/osd_io.c
+++ b/lustre/osd-ldiskfs/osd_io.c
@@ -60,6 +60,9 @@
 /* ext_depth() */
 #include <ldiskfs/ldiskfs_extents.h>
 
+/* for snapshot lock */
+#include <ldiskfs/snapshot.h>
+
 static int __osd_init_iobuf(struct osd_device *d, struct osd_iobuf *iobuf,
 			    int rw, int line, int pages)
 {
@@ -502,14 +505,6 @@ static int osd_bufs_put(const struct lu_env *env, struct dt_object *dt,
 #define ldiskfs_ext_pblock(ex) ext_pblock((ex))
 #endif
 
-struct bpointers {
-	unsigned long *blocks;
-	unsigned long start;
-	int num;
-	int init_num;
-	int create;
-};
-
 static long ldiskfs_ext_find_goal(struct inode *inode,
 				  struct ldiskfs_ext_path *path,
 				  unsigned long block, int *aflags)
@@ -597,7 +592,7 @@ static int ldiskfs_ext_new_extent_cb(struct inode *inode,
 		goto map;
 	}
 
-	if (bp->create == 0) {
+	if (bp->create != WRITE_COMMIT_OP) {
 		i = 0;
 		if (cex->ec_block < bp->start)
 			i = bp->start - cex->ec_block;
@@ -832,6 +827,8 @@ static int osd_ldiskfs_map_inode_pages(struct inode *inode, struct page **page,
 
 	return rc;
 }
+static int osd_read_prep(const struct lu_env *env, struct dt_object *dt,
+			 struct niobuf_local *lnb, int npages);
 
 static int osd_write_prep(const struct lu_env *env, struct dt_object *dt,
                           struct niobuf_local *lnb, int npages)
@@ -851,6 +848,21 @@ static int osd_write_prep(const struct lu_env *env, struct dt_object *dt,
 
         LASSERT(inode);
 
+	if (npages && LDISKFS_TEST_OST_SNAPSHOT_ORIG(inode)) {
+		rc = osd_init_iobuf(osd, iobuf, 0, npages);
+		if (unlikely(rc != 0))
+			RETURN(rc);
+		for (i = 0; i < npages; i++)
+			osd_iobuf_add_page(iobuf, lnb[i].page);
+
+		rc = osd_ldiskfs_map_inode_pages(inode, iobuf->dr_pages,
+						 npages,
+						 iobuf->dr_blocks,
+						 WRITE_COPY_OP);
+		if (rc)
+			RETURN(rc);
+	}
+
 	rc = osd_init_iobuf(osd, iobuf, 0, npages);
 	if (unlikely(rc != 0))
 		RETURN(rc);
@@ -903,7 +915,8 @@ static int osd_write_prep(const struct lu_env *env, struct dt_object *dt,
         if (iobuf->dr_npages) {
 		rc = osd_ldiskfs_map_inode_pages(inode, iobuf->dr_pages,
 						 iobuf->dr_npages,
-						 iobuf->dr_blocks, 0);
+						 iobuf->dr_blocks,
+						 WRITE_PREP_OP);
                 if (likely(rc == 0)) {
                         rc = osd_do_bio(osd, inode, iobuf);
                         /* do IO stats for preparation reads */
@@ -1106,7 +1119,8 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt,
         } else if (iobuf->dr_npages > 0) {
 		rc = osd_ldiskfs_map_inode_pages(inode, iobuf->dr_pages,
 						 iobuf->dr_npages,
-						 iobuf->dr_blocks, 1);
+						 iobuf->dr_blocks,
+						 WRITE_COMMIT_OP);
         } else {
                 /* no pages to write, no transno is needed */
                 thandle->th_local = 1;
@@ -1201,10 +1215,13 @@ static int osd_read_prep(const struct lu_env *env, struct dt_object *dt,
 				    cache_hits + cache_misses);
 
         if (iobuf->dr_npages) {
-		rc = osd_ldiskfs_map_inode_pages(inode, iobuf->dr_pages,
-						 iobuf->dr_npages,
-						 iobuf->dr_blocks, 0);
-                rc = osd_do_bio(osd, inode, iobuf);
+		rc = osd_ldiskfs_map_inode_pages(
+				inode, iobuf->dr_pages, iobuf->dr_npages,
+				iobuf->dr_blocks, READ_OP);
+		if (likely(rc == 0))
+			rc = osd_do_bio(osd, inode, iobuf);
+		else
+			osd_fini_iobuf(osd, iobuf);
 
                 /* IO stats will be done in osd_bufs_put() */
         }
@@ -1644,8 +1661,19 @@ static int osd_punch(const struct lu_env *env, struct dt_object *dt,
 
 	tid = oh->ot_handle->h_transaction->t_tid;
 
+	if (LDISKFS_TEST_OST_SNAPSHOT_ORIG(inode)) {
+		rc = ldiskfs_snapshot_punch(ldiskfs_journal_current_handle(),
+					    inode, start, end);
+		if (rc) {
+			CERROR("fail to snapshot punch inode=%lu\n",
+			       inode->i_ino);
+			SNAPSHOT_CONSOLE_ERR(rc);
+			RETURN(rc);
+		}
+	}
 	i_size_write(inode, start);
 	ll_truncate_pagecache(inode, start);
+
 #ifdef HAVE_INODEOPS_TRUNCATE
 	if (inode->i_op->truncate) {
 		inode->i_op->truncate(inode);
diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c
index 98e857a..690a7bc 100644
--- a/lustre/osd-ldiskfs/osd_lproc.c
+++ b/lustre/osd-ldiskfs/osd_lproc.c
@@ -45,6 +45,8 @@
 
 #include "osd_internal.h"
 
+#include <ldiskfs/snapshot.h>
+
 #ifdef LPROCFS
 
 void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf)
@@ -403,6 +405,30 @@ ldiskfs_osd_auto_scrub_seq_write(struct file *file, const char *buffer,
 LPROC_SEQ_FOPS(ldiskfs_osd_auto_scrub);
 
 static int
+ldiskfs_osd_snapshot_lock_timeout_seq_show(struct seq_file *m, void *data)
+{
+	return seq_printf(m, "%d\n", ldiskfs_get_snapshot_lock_timeout());
+}
+
+static ssize_t
+ldiskfs_osd_snapshot_lock_timeout_seq_write(struct file *file,
+						const char *buffer,
+						size_t count, loff_t *off)
+{
+	int     snap_tout;
+	int     rc;
+
+	rc = lprocfs_write_helper(buffer, count, &snap_tout);
+	if (rc != 0)
+		return rc;
+
+	ldiskfs_set_snapshot_lock_timeout(snap_tout);
+
+	return count;
+}
+LPROC_SEQ_FOPS(ldiskfs_osd_snapshot_lock_timeout);
+
+static int
 ldiskfs_osd_track_declares_assert_seq_show(struct seq_file *m, void *data)
 {
 	return seq_printf(m, "%d\n", ldiskfs_track_declares_assert);
@@ -553,6 +579,8 @@ struct lprocfs_seq_vars lprocfs_osd_obd_vars[] = {
 struct lprocfs_seq_vars lprocfs_osd_module_vars[] = {
 	{ .name	=	"track_declares_assert",
 	  .fops	=	&ldiskfs_osd_track_declares_assert_fops		},
+	{ .name	=	"snapshot_lock_timeout",
+	  .fops	=	&ldiskfs_osd_snapshot_lock_timeout_fops		},
 	{ 0 }
 };
 
diff --git a/lustre/osp/osp_object.c b/lustre/osp/osp_object.c
index 690e1ce..f993b25 100644
--- a/lustre/osp/osp_object.c
+++ b/lustre/osp/osp_object.c
@@ -1513,6 +1513,83 @@ static int osp_index_try(const struct lu_env *env,
 	return 0;
 }
 
+/*
+ *  osp_snapshot_clone()
+ *
+ *  \param[in]	env		environment
+ *  \param[in]	snap_dt		snapshot object
+ *  \param[in]	attr		snapshot attributes
+ *  \param[in]	orig_fid	original fid
+ *  \param[in]	mdt_fid		snapshot fid on mdt
+ *
+ *  \retval	0		success
+ *  \retval	not 0		errcode
+ */
+static int osp_snapshot_clone(const struct lu_env *env,
+			      struct dt_object *snap_dt,
+			      struct lu_attr *attr,
+			      const struct lu_fid *orig_fid,
+			      const struct lu_fid *mdt_fid)
+{
+	struct osp_device	*d = lu2osp_dev(snap_dt->do_lu.lo_dev);
+	struct obd_import	*imp = d->opd_obd->u.cli.cl_import;
+	struct ptlrpc_request	*req = NULL;
+	struct ost_body		*body;
+	struct lu_ucred         *uc  =  lu_ucred(env);
+	int			rc;
+	ENTRY;
+
+	/* alloc req */
+	req = ptlrpc_request_alloc(imp, &RQF_OST_SNAPSHOT);
+	if (req == NULL)
+		GOTO(out, rc = -ENOMEM);
+
+	/* pack req */
+	rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SNAPSHOT);
+	if (rc) {
+		ptlrpc_request_free(req);
+		req = NULL;
+		GOTO(out, rc);
+	}
+	req->rq_request_portal = OST_IO_PORTAL;
+
+	body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
+	LASSERT(body);
+	memset(body, 0x0, sizeof(*body));
+
+	fid_to_ostid(lu_object_fid(&snap_dt->do_lu), &body->oa.o_oi);
+	body->oa.o_valid |= (OBD_MD_FLID | OBD_MD_FLGROUP);
+
+	/* flag of inode on MDS is not set to OST */
+	obdo_from_la(&body->oa, attr, attr->la_valid & ~LA_FLAGS);
+	obdo_set_parent_fid(&body->oa, mdt_fid);
+
+	body->oa.o_stripe_idx = d->opd_index;
+
+	body->oa.o_snapshot_orig_seq = orig_fid->f_seq;
+	body->oa.o_snapshot_orig_oid = orig_fid->f_oid;
+	body->oa.o_snapshot_orig_ver = orig_fid->f_ver;
+
+	body->oa.o_flags = (uc && (uc->uc_cap & CFS_CAP_SYS_RESOURCE_MASK))
+		? OBD_BRW_NOQUOTA : 0;
+	ptlrpc_request_set_replen(req);
+	ptlrpc_at_set_req_timeout(req);
+
+	/* send req */
+	rc = ptlrpc_queue_wait(req);
+out:
+	if (rc)
+		CERROR("failed to create snapshot "
+		       "FID="DFID" ost_idx=%d, ost_fid="DFID" rc=%d\n",
+		       PFID(mdt_fid), d->opd_index,
+		       PFID(lu_object_fid(&snap_dt->do_lu)),
+		       rc);
+
+	if (req)
+		ptlrpc_req_finished(req);
+	RETURN(rc);
+}
+
 struct dt_object_operations osp_obj_ops = {
 	.do_declare_attr_get	= osp_declare_attr_get,
 	.do_attr_get		= osp_attr_get,
@@ -1527,6 +1604,7 @@ struct dt_object_operations osp_obj_ops = {
 	.do_declare_destroy	= osp_declare_object_destroy,
 	.do_destroy		= osp_object_destroy,
 	.do_index_try		= osp_index_try,
+	.do_osp_snapshot_clone	= osp_snapshot_clone,
 };
 
 static int osp_object_init(const struct lu_env *env, struct lu_object *o,
diff --git a/lustre/ptlrpc/layout.c b/lustre/ptlrpc/layout.c
index 4ecd9b5..b9f1db9 100644
--- a/lustre/ptlrpc/layout.c
+++ b/lustre/ptlrpc/layout.c
@@ -312,6 +312,23 @@ static const struct req_msg_field *mds_reint_setxattr_client[] = {
 	&RMF_DLM_REQ
 };
 
+static const struct req_msg_field *mds_reint_snapshot_client[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_REC_REINT,
+	&RMF_NAME,
+	&RMF_SNAP_EANAME,
+	&RMF_EADATA,
+	&RMF_EADATA2,
+	&RMF_DLM_REQ
+};
+
+static const struct req_msg_field *mds_reint_snapshot_server[] = {
+	&RMF_PTLRPC_BODY,
+	&RMF_MDT_BODY,
+	&RMF_CAPA1,
+	&RMF_MDT_MD
+};
+
 static const struct req_msg_field *mdt_swap_layouts[] = {
 	&RMF_PTLRPC_BODY,
 	&RMF_MDT_BODY,
@@ -799,6 +816,8 @@ static struct req_format *req_formats[] = {
 	&RQF_CONNECT,
 	&RQF_LFSCK_NOTIFY,
 	&RQF_LFSCK_QUERY,
+	&RQF_MDS_REINT_SNAPSHOT,
+	&RQF_OST_SNAPSHOT,
 };
 
 struct req_msg_field {
@@ -1063,6 +1082,14 @@ struct req_msg_field RMF_EADATA = DEFINE_MSGF("eadata", 0, -1,
                                                     NULL, NULL);
 EXPORT_SYMBOL(RMF_EADATA);
 
+struct req_msg_field RMF_EADATA2 = DEFINE_MSGF("eadata", 0, -1,
+					       NULL, NULL);
+EXPORT_SYMBOL(RMF_EADATA2);
+
+struct req_msg_field RMF_SNAP_EANAME =
+	DEFINE_MSGF("snap_eaname", RMF_F_STRING, -1, NULL, NULL);
+EXPORT_SYMBOL(RMF_SNAP_EANAME);
+
 struct req_msg_field RMF_EAVALS = DEFINE_MSGF("eavals", 0, -1, NULL, NULL);
 EXPORT_SYMBOL(RMF_EAVALS);
 
@@ -1676,6 +1703,16 @@ struct req_format RQF_LFSCK_QUERY =
 	DEFINE_REQ_FMT0("LFSCK_QUERY", obd_lfsck_request, obd_lfsck_reply);
 EXPORT_SYMBOL(RQF_LFSCK_QUERY);
 
+struct req_format RQF_MDS_REINT_SNAPSHOT =
+	DEFINE_REQ_FMT0("MDS_REINT_SNAPSHOT", mds_reint_snapshot_client,
+					      mds_reint_snapshot_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_SNAPSHOT);
+
+struct req_format RQF_OST_SNAPSHOT =
+	DEFINE_REQ_FMT0("OST_SNAPSHOT", ost_body_only,
+					ost_body_only);
+EXPORT_SYMBOL(RQF_OST_SNAPSHOT);
+
 #if !defined(__REQ_LAYOUT_USER__)
 
 /* Convenience macro */
diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c
index 84b0460..8b644af 100644
--- a/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/lustre/ptlrpc/lproc_ptlrpc.c
@@ -73,6 +73,18 @@ struct ll_rpc_opcode {
         { OST_QUOTACHECK,   "ost_quotacheck" },
         { OST_QUOTACTL,     "ost_quotactl" },
         { OST_QUOTA_ADJUST_QUNIT, "ost_quota_adjust_qunit" },
+	{ 21,                NULL },
+	{ 22,                NULL },
+	{ 23,                NULL },
+	{ 24,                NULL },
+	{ 25,                NULL },
+	{ 26,                NULL },
+	{ 27,                NULL },
+	{ 28,                NULL },
+	{ 29,                NULL },
+	{ 30,                NULL },
+	{ 31,                NULL },
+	{ OST_SNAPSHOT,     "ost_snapshot" },
         { MDS_GETATTR,      "mds_getattr" },
         { MDS_GETATTR_NAME, "mds_getattr_lock" },
         { MDS_CLOSE,        "mds_close" },
diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c
index 7768e41..4c7fa52 100644
--- a/lustre/ptlrpc/pack_generic.c
+++ b/lustre/ptlrpc/pack_generic.c
@@ -2092,6 +2092,10 @@ void lustre_swab_mdt_rec_reint (struct mdt_rec_reint *rr)
 	__swab32s(&rr->rr_flags);
 	__swab32s(&rr->rr_flags_h);
 	__swab32s(&rr->rr_umask);
+	__swab32s(&rr->rr_padding_4); /* SEE FOLLOWINGS */
+	/* rr_padding_4 is used for mdt_rec_snapshot_create::sc_flags
+	this swab function handles both mdt_rec_reint and
+	mdt_rec_snapshot_create */
 
 	CLASSERT(offsetof(typeof(*rr), rr_padding_4) != 0);
 };
diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c
index 62ddffa..53b7dc8 100644
--- a/lustre/ptlrpc/wiretest.c
+++ b/lustre/ptlrpc/wiretest.c
@@ -110,7 +110,9 @@ void lustre_assert_wire_constants(void)
 		 (long long)OST_QUOTACTL);
 	LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, "found %lld\n",
 		 (long long)OST_QUOTA_ADJUST_QUNIT);
-	LASSERTF(OST_LAST_OPC == 21, "found %lld\n",
+	LASSERTF(OST_SNAPSHOT == 32, "found %lld\n",
+		 (long long)OST_SNAPSHOT);
+	LASSERTF(OST_LAST_OPC == 33, "found %lld\n",
 		 (long long)OST_LAST_OPC);
 	LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
 		 OBD_OBJECT_EOF);
@@ -202,7 +204,9 @@ void lustre_assert_wire_constants(void)
 		 (long long)REINT_RMENTRY);
 	LASSERTF(REINT_MIGRATE == 9, "found %lld\n",
 		 (long long)REINT_MIGRATE);
-	LASSERTF(REINT_MAX == 10, "found %lld\n",
+	LASSERTF(REINT_SNAPSHOT == 21, "found %lld\n",
+		 (long long)REINT_SNAPSHOT);
+	LASSERTF(REINT_MAX == 22, "found %lld\n",
 		 (long long)REINT_MAX);
 	LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n",
 		(unsigned)DISP_IT_EXECD);
diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c
index af9cb58..6fdf048 100644
--- a/lustre/target/tgt_handler.c
+++ b/lustre/target/tgt_handler.c
@@ -471,6 +471,7 @@ static int tgt_filter_recovery_request(struct ptlrpc_request *req,
 	case OST_SETATTR:
 	case OST_SYNC:
 	case OST_WRITE:
+	case OST_SNAPSHOT:
 		*process = target_queue_recovery_request(req, obd);
 		RETURN(0);
 
@@ -1650,12 +1651,13 @@ int tgt_brw_read(struct tgt_session_info *tsi)
 	struct niobuf_remote	*remote_nb;
 	struct niobuf_local	*local_nb;
 	struct obd_ioobj	*ioo;
-	struct ost_body		*body, *repbody;
+	struct ost_body		*body, *repbody = NULL;
 	struct l_wait_info	 lwi;
 	struct lustre_handle	 lockh = { 0 };
 	int			 niocount, npages, nob = 0, rc, i;
 	int			 no_reply = 0;
 	struct tgt_thread_big_cache *tbc = req->rq_svc_thread->t_data;
+	void                    *snap_lock = NULL;
 
 	ENTRY;
 
@@ -1726,11 +1728,22 @@ int tgt_brw_read(struct tgt_session_info *tsi)
 	repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
 	repbody->oa = body->oa;
 
+	/* if taget is not snapshot_orig,
+	 * obd_snapshot_lock() doesn't hold snapshot_lock
+	 * and snap_lock is NULL */
+	rc = obd_snapshot_lock(tsi->tsi_env, exp,
+			&repbody->oa, false, &snap_lock);
+	if (rc) {
+		CERROR("fail to lock snapshot. err=%d\n",
+			rc);
+		GOTO(out_lock, rc);
+	}
+
 	npages = PTLRPC_MAX_BRW_PAGES;
 	rc = obd_preprw(tsi->tsi_env, OBD_BRW_READ, exp, &repbody->oa, 1,
 			ioo, remote_nb, &npages, local_nb, NULL, BYPASS_CAPA);
 	if (rc != 0)
-		GOTO(out_lock, rc);
+		GOTO(out_snap_unlock, rc);
 
 	desc = ptlrpc_prep_bulk_exp(req, npages, ioobj_max_brw_get(ioo),
 				    BULK_PUT_SOURCE, OST_BULK_PORTAL);
@@ -1792,6 +1805,10 @@ out_commitrw:
 			  NULL, rc);
 	if (rc == 0)
 		tgt_drop_id(exp, &repbody->oa);
+out_snap_unlock:
+	if (snap_lock)
+		obd_snapshot_unlock(tsi->tsi_env, exp,
+					&repbody->oa, snap_lock);
 out_lock:
 	tgt_brw_unlock(ioo, remote_nb, &lockh, LCK_PR);
 
@@ -1884,15 +1901,16 @@ int tgt_brw_write(struct tgt_session_info *tsi)
 	struct niobuf_remote	*remote_nb;
 	struct niobuf_local	*local_nb;
 	struct obd_ioobj	*ioo;
-	struct ost_body		*body, *repbody;
+	struct ost_body		*body, *repbody = NULL;
 	struct l_wait_info	 lwi;
 	struct lustre_handle	 lockh = {0};
 	__u32			*rcs;
 	int			 objcount, niocount, npages;
 	int			 rc, i, j;
-	cksum_type_t		 cksum_type = OBD_CKSUM_CRC32;
+	cksum_type_t             cksum_type = OBD_CKSUM_CRC32;
 	bool			 no_reply = false, mmap;
 	struct tgt_thread_big_cache *tbc = req->rq_svc_thread->t_data;
+	void			*snap_lock = NULL;
 
 	ENTRY;
 
@@ -1994,6 +2012,16 @@ int tgt_brw_write(struct tgt_session_info *tsi)
 		GOTO(out_lock, rc = -ENOMEM);
 	repbody->oa = body->oa;
 
+	/* if taget is not snapshot_orig,
+	 * obd_snapshot_lock() doesn't hold snapshot_lock
+	 * and snap_lock is NULL */
+	rc = obd_snapshot_lock(tsi->tsi_env, exp,
+				&repbody->oa, false, &snap_lock);
+	if (rc) {
+		CERROR("fail to lock snapshot. err=%d\n", rc);
+		GOTO(out_lock, rc);
+	}
+
 	npages = PTLRPC_MAX_BRW_PAGES;
 	rc = obd_preprw(tsi->tsi_env, OBD_BRW_WRITE, exp, &repbody->oa,
 			objcount, ioo, remote_nb, &npages, local_nb, NULL,
@@ -2090,6 +2118,9 @@ skip_transfer:
 		tgt_drop_id(exp, &repbody->oa);
 	}
 out_lock:
+	if (snap_lock)
+		obd_snapshot_unlock(tsi->tsi_env, exp,
+				    &repbody->oa, snap_lock);
 	tgt_brw_unlock(ioo, remote_nb, &lockh, LCK_PW);
 	if (desc)
 		ptlrpc_free_bulk_nopin(desc);
diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am
index da0213f..6bf35b3 100644
--- a/lustre/utils/Makefile.am
+++ b/lustre/utils/Makefile.am
@@ -48,7 +48,7 @@ lctl_SOURCES = lustre_lfsck.c obd.c lustre_cfg.c lctl.c obdctl.h
 lctl_LDADD :=  liblustreapi.a $(LIBPTLCTL) $(PTHREAD_LIBS) $(LIBREADLINE)
 lctl_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a
 
-lfs_SOURCES = lfs.c
+lfs_SOURCES = lfs.c lfs_snapshot.c lfs_snapshot.h
 lfs_LDADD := liblustreapi.a $(LIBPTLCTL) $(LIBREADLINE)
 lfs_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a
 
diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c
index 261da54..23ccb3d 100644
--- a/lustre/utils/lctl.c
+++ b/lustre/utils/lctl.c
@@ -448,6 +448,17 @@ command_t cmdlist[] = {
 	{"set_route", jt_ptl_notify_router, 0,
 	 "enable/disable routes via gateway in the portals routing table\n"
 	 "usage: set_route <gateway> <up/down> [<time>]"},
+	/* snapshot operations */
+	{"==== snapshot ====", jt_noop, 0, "snapshot"},
+	{"snapshot", jt_snapshot, 0,
+	 "enable snapshot\n"
+	 "usage: snapshot on <fsname>\n"
+	 "       snapshot status <fsname>"},
+	{"snapshot_get_orphan", jt_snapshot_orphan, 0,
+	 "get orphan snapshot object on OST\n"
+	 "usage: snapshot_get_orphan --list <OST name>\n"
+	 "                           --fid <ost fid> <OST name>\n"
+	 "                           --delete [-f] <ost fid> <OST name>"},
 
 	{ 0, 0, 0, NULL }
 };
diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c
index 9c69498..8a466ea 100644
--- a/lustre/utils/lfs.c
+++ b/lustre/utils/lfs.c
@@ -70,6 +70,8 @@
 #include <lustre/lustreapi.h>
 #include <lustre_ver.h>
 
+#include "lfs_snapshot.h"
+
 /* all functions */
 static int lfs_setstripe(int argc, char **argv);
 static int lfs_find(int argc, char **argv);
@@ -114,6 +116,7 @@ static int lfs_hsm_remove(int argc, char **argv);
 static int lfs_hsm_cancel(int argc, char **argv);
 static int lfs_swap_layouts(int argc, char **argv);
 static int lfs_mv(int argc, char **argv);
+static int lfs_snapshot(int argc, char **argv);
 
 #define SETSTRIPE_USAGE(_cmd, _tgt) \
 	"usage: "_cmd" [--stripe-count|-c <stripe_count>]\n"\
@@ -337,6 +340,11 @@ command_t cmdlist[] = {
 	 "To move directories between MDTs.\n"
 	 "usage: mv <directory|filename> [--mdt-index|-M] <mdt_index> "
 	 "[--verbose|-v]\n"},
+	{"snapshot", lfs_snapshot, 0,
+	 "snapshot operations.\n"
+	 "usage: snapshot --create [-s <snapshot>] [-d <directory>]\n"
+	 "       snapshot --delete -s <snapshot> [-d <directory>] [-f] [-I]\n"
+	 "       snapshot --list [-d <directory>] [-R]"},
 	{"help", Parser_help, 0, "help"},
 	{"exit", Parser_quit, 0, "quit"},
 	{"quit", Parser_quit, 0, "quit"},
@@ -3846,6 +3854,36 @@ static int lfs_swap_layouts(int argc, char **argv)
 				  SWAP_LAYOUTS_KEEP_ATIME);
 }
 
+/*
+ *  lfs_snapshot
+ *
+ *  lfs snapshot command
+ *
+ *  \param[in]	argc		number of arguments
+ *  \param[in]	argv		array of arguments
+ *
+ *  \retval     0		success
+ *  \retval     not 0		error code
+ */
+static int lfs_snapshot(int argc, char **argv)
+{
+	int rc;
+
+	if (argc < 2)
+		return CMD_HELP;
+
+	if (strcmp("--create", argv[1]) == 0)
+		rc = lfs_snapshot_create(argc - 1, &argv[1]);
+	else if (strcmp("--delete", argv[1]) == 0)
+		rc = lfs_snapshot_delete(argc - 1, &argv[1]);
+	else if (strcmp("--list", argv[1]) == 0)
+		rc = lfs_snapshot_list(argc - 1, &argv[1]);
+	else
+		rc = CMD_HELP;
+
+	return rc;
+}
+
 int main(int argc, char **argv)
 {
         int rc;
diff --git a/lustre/utils/lfs_snapshot.c b/lustre/utils/lfs_snapshot.c
new file mode 100644
index 0000000..7cd4573
--- /dev/null
+++ b/lustre/utils/lfs_snapshot.c
@@ -0,0 +1,1703 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ *   Copyright(c) 2016-2017 FUJITSU LIMITED.
+ *   All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <dirent.h>
+#include <grp.h>
+#include <pwd.h>
+#include <sys/statfs.h>
+
+#include <libcfs/libcfs.h>
+#include <libcfs/libcfsutil.h>
+#include <lustre/lustreapi.h>
+#include <lustre_ver.h>
+
+#include "lfs_snapshot.h"
+
+#define SFBITS_MASK (S_ISUID | S_ISGID)
+
+struct create_param {
+	char top_fsuuid[sizeof(struct obd_uuid)];
+};
+
+/*
+ *  snapshot_digit
+ *
+ *  get number of digit
+ *
+ *  \param[in]	num		numeric value
+ *
+ *  \retval     1 or more	digit
+ */
+static inline int snapshot_digit(int num)
+{
+	int digit = 1;
+
+	while ((num /= 10) != 0)
+		digit++;
+
+	return digit;
+}
+
+/*
+ *  lock_file_create
+ *
+ *  create snapshot lock file
+ *
+ *  \param[in]	target		target directory
+ *
+ *  \retval     0		success
+ *  \retval     not 0		error
+ */
+static int lock_file_create(char *target)
+{
+	char name[HOST_NAME_MAX + 1];
+	char data[HOST_NAME_MAX + 1 + 10 + 1]; /* host + '\n' + pid(4byte) */
+	pid_t pid;
+	int fd;
+	int size;
+	int rc = 0;
+
+	/* get host name */
+	rc = gethostname(name, sizeof(name));
+	if (rc) {
+		rc = -errno;
+		return rc;
+	}
+
+	/* get pid */
+	pid = getpid();
+
+	/* create lock file */
+	fd = open(target, O_CREAT | O_EXCL | O_WRONLY,
+						S_IRUSR | S_IRGRP | S_IROTH);
+	if (fd == -1) {
+		rc = -errno;
+		return rc;
+	}
+
+	/* write infomation */
+	snprintf(data, sizeof(data), "%s\n%d", name, pid);
+	size = strlen(data);
+	rc = write(fd, data, size);
+	if (rc == -1) {
+		rc = -errno;
+		goto error;
+	}
+	if (rc != size) {
+		rc = -EIO;
+		goto error;
+	}
+
+	/* file close */
+	close(fd);
+
+	return 0;
+
+error:
+	/* delete lock file */
+	unlink(target);
+
+	/* file close */
+	close(fd);
+
+	return rc;
+}
+
+/*
+ *  lock_snapshot
+ *
+ *  create snapshot lock file
+ *
+ *  \param[in]	target		target directory
+ *  \param[in]	cmd		sub command name(for error message)
+ *
+ *  \retval     0		success
+ *  \retval     not 0		error
+ */
+static int lock_snapshot(char *target, char *cmd)
+{
+	char path[PATH_MAX + 1];
+	int rc;
+
+	/* create path */
+	if (strlen(target) + sizeof("/" SNAPSHOT_LOCK) > PATH_MAX) {
+		fprintf(stderr, SNAP_ERR_LFS_DIRLONG, cmd);
+		return -ENAMETOOLONG;
+	}
+	strncpy(path, target, strlen(target)+1);
+	strncat(path, "/", 1);
+	strncat(path, SNAPSHOT_LOCK, strlen(SNAPSHOT_LOCK));
+
+	/* create lock file */
+	rc = lock_file_create(path);
+	if (rc == -EEXIST)
+		fprintf(stderr, SNAP_ERR_LFS_CMDBUSY, cmd, target);
+	else if (rc != 0)
+		SNAP_ERR_LFS_OUT(rc, cmd,  __func__, __LINE__, target);
+
+	return rc;
+}
+
+/*
+ *  unlock_snapshot
+ *
+ *  delete snapshot lock file
+ *
+ *  \param[in]	target		target directory
+ *  \param[in]	cmd		sub command name(for error message)
+ *
+ *  \retval     0		success
+ *  \retval     not 0		error
+ */
+static int unlock_snapshot(char *target, char *cmd)
+{
+	char path[PATH_MAX + 1];
+	int rc;
+	int retry = 0;
+
+	/* create path */
+	if (strlen(target) + sizeof("/" SNAPSHOT_LOCK) > PATH_MAX) {
+		fprintf(stderr, SNAP_ERR_LFS_DIRLONG, cmd);
+		return -ENAMETOOLONG;
+	}
+	strncpy(path, target, strlen(target)+1);
+	strncat(path, "/", 1);
+	strncat(path, SNAPSHOT_LOCK, strlen(SNAPSHOT_LOCK));
+
+	/* delete lock file */
+retry:
+	rc = unlink(path);
+	if (rc == -1) {
+		if (errno == ENOENT) {
+			rc = 0;
+		} else {
+			if (retry < SNAPSHOT_UNLOCK_RETRY_MAX) {
+				retry++;
+				sleep(SNAPSHOT_UNLOCK_SLEEP_SEC);
+				goto retry;
+			}
+			rc = -errno;
+			SNAP_ERR_LFS_OUT(rc, cmd, __func__, __LINE__, path);
+		}
+	}
+
+	return rc;
+}
+
+/*
+ *  get_snapshot_directory
+ *
+ *  get snapshot current directory.
+ *
+ *  \param[in]	val_d		buffer
+ *  \param[in]	subcmd		sub command name(for error message)
+ *
+ *  \retval     0		success
+ *  \retval     -1		error
+ */
+static int get_snapshot_directory(char *val_d, char *cmd)
+{
+	if (getcwd(val_d, PATH_MAX) == NULL) {
+		if (errno == ENOENT)
+			fprintf(stderr, SNAP_ERR_LFS_NOCDIR, cmd);
+		else
+			SNAP_ERR_LFS_OUT(-errno, cmd,
+					 __func__, __LINE__, "./");
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ *  check_snapshot_dir
+ *
+ *  check snapshot directory.(call llite)
+ *
+ *  \param[in]	target		target snapshot directory
+ *
+ *  \retval     0		not snapshot directory
+ *  \retval     1		snapshot directory
+ *  \retval     not 0, 1	error code
+ */
+static int check_snapshot_dir(char *target)
+{
+	struct snapshot_data data;
+	DIR	*t_dir;
+	int	t_fd;
+	int	rc;
+
+	/* open snapshot destination dir */
+	t_dir = opendir(target);
+	if (!t_dir) {
+		rc = -errno;
+		return rc;
+	}
+	t_fd = dirfd(t_dir);
+	if (t_fd == -1) {
+		rc = -errno;
+		closedir(t_dir);
+		return rc;
+	}
+
+	/* pack parameter */
+	memset(&data, 0x0, sizeof(data));
+	data.subcmd = LL_SNAPSHOT_STAT;
+
+	/* snapshot ioctl */
+	rc = ioctl(t_fd, LL_IOC_SNAPSHOT, &data);
+	if (rc == -1) {
+		rc = -errno;
+		closedir(t_dir);
+		return rc;
+	}
+	/* close dest dir */
+	closedir(t_dir);
+
+	return rc;
+}
+
+/*
+ *  check_snapshot_user
+ *
+ *  check snapshot delete user.
+ *
+ *  \param[in]	target		target snapshot directory
+ *  \param[in]	snapname	snapshot name
+ *
+ *  \retval     0		not delete user
+ *  \retval     1		delete user
+ *  \retval     not 0, 1	error code
+ */
+static int check_snapshot_user(char *target, char *snapname)
+{
+	char path[PATH_MAX + 1];
+	uid_t uid;
+	struct stat st;
+	int rc;
+
+	/* get user id */
+	uid = geteuid();
+
+	/* check root user */
+	if (uid == 0)
+		return 1;
+
+	/* check user id */
+	snprintf(path, PATH_MAX, "%s/%s", target, snapname);
+	rc = lstat(path, &st);
+	if (rc)
+		return -errno;
+	if (st.st_uid == uid)
+		return 1;
+
+	return 0;
+}
+
+/*
+ *  check_snapshot_enabled
+ *
+ *  check snapshot enabled flag
+ *
+ *  \param[in]	target		target snapshot directory
+ *
+ *  \retval     0		disabled
+ *  \retval     1		enabled
+ *  \retval     not 0		error code
+ */
+static int check_snapshot_enabled(char *target)
+{
+	struct snapshot_data data;
+	DIR *dir_t;
+	int fd_t;
+	int rc;
+
+	/* open target path */
+	dir_t = opendir(target);
+	if (dir_t == NULL) {
+		rc = -errno;
+		return rc;
+	}
+	fd_t = dirfd(dir_t);
+	if (fd_t == -1) {
+		rc = -errno;
+		closedir(dir_t);
+		return rc;
+	}
+
+	/* pack parameter */
+	memset(&data, 0x0, sizeof(data));
+	data.subcmd = LL_SNAPSHOT_CHKENABLED;
+
+	/* create snapshot(post create) */
+	rc = ioctl(fd_t, LL_IOC_SNAPSHOT, &data);
+	if (rc < 0)
+		rc = -errno;
+
+	/* close target path */
+	closedir(dir_t);
+
+	return rc;
+}
+
+/*
+ *  create_post_snapshot
+ *
+ *  post create snapshot, delete memory
+ *
+ *  \param[in]	target		target snapshot directory
+ *
+ *  \retval     0		success
+ *  \retval     not 0		error code
+ */
+static int create_post_snapshot(char *target)
+{
+	struct snapshot_data data;
+	DIR *dir_t;
+	int fd_t;
+	int rc;
+	char *cmd = "create";
+
+	/* open target path */
+	dir_t = opendir(target);
+	if (dir_t == NULL) {
+		rc = -errno;
+		SNAP_ERR_LFS_OUT(rc, cmd, __func__, __LINE__, target);
+		return rc;
+	}
+	fd_t = dirfd(dir_t);
+	if (fd_t == -1) {
+		rc = -errno;
+		SNAP_ERR_LFS_OUT(rc, cmd, __func__, __LINE__, target);
+		closedir(dir_t);
+		return rc;
+	}
+
+	/* pack parameter */
+	memset(&data, 0x0, sizeof(data));
+	data.subcmd = LL_SNAPSHOT_CREATE_POST;
+
+	/* create snapshot(post create) */
+	rc = ioctl(fd_t, LL_IOC_SNAPSHOT, &data);
+	if (rc == -1) {
+		rc = -errno;
+		SNAP_ERR_LFS_OUT(rc, cmd, __func__, __LINE__, target);
+		closedir(dir_t);
+		return rc;
+	}
+
+	/* close target path */
+	closedir(dir_t);
+
+	return 0;
+}
+
+/*
+ *  create_entry()
+ *
+ *  snapshot snapshot file or directory.(call llite)
+ *
+ *  \param[in]	src		source file/dir path name
+ *  \param[in]	dst		destination directory name
+ *  \param[in]	target		target file/dir name
+ *  \param[in]	hidden		hidden directory(1) or not(0)
+ *
+ *  \retval	0	success
+ *  \retval	not 0	failure
+ */
+int create_entry(char *src, char *dst, char *target, __u32 hidden)
+{
+	struct snapshot_data data;
+	DIR *p_dst_dir;
+	int src_fd, dst_fd;
+	struct stat sb, psb;
+	int rc = 0;
+	char *cmd = "create";
+	bool warn_eacces = false;
+	bool bit_cleared = false;
+	bool change_ug = false;
+
+	/* open snapshot original file or dir */
+	if (hidden == 0) {
+		/* called from create_child_snapshots() */
+		warn_eacces = true;
+
+		src_fd = open(src, O_RDONLY);
+		if (src_fd == -1) {
+			SNAP_ERR_WARN_LFS_OUT(-errno, cmd,  __func__,
+					 __LINE__, src, warn_eacces);
+			return -errno;
+		}
+	} else
+		src_fd = -1;
+
+	/* get attibute spec */
+	rc = lstat(src, &sb);
+	if (rc) {
+		rc = -errno;
+		SNAP_ERR_WARN_LFS_OUT(rc, cmd,  __func__,
+				 __LINE__, src, warn_eacces);
+		goto out_src_close;
+	}
+
+	/* open snapshot destination dir */
+	p_dst_dir = opendir(dst);
+	if (!p_dst_dir) {
+		rc = -errno;
+		SNAP_ERR_WARN_LFS_OUT(rc, cmd,  __func__,
+				 __LINE__, dst, warn_eacces);
+		goto out_src_close;
+	}
+	dst_fd = dirfd(p_dst_dir);
+	if (dst_fd == -1) {
+		rc = -errno;
+		SNAP_ERR_WARN_LFS_OUT(rc, cmd,  __func__,
+				 __LINE__, dst, warn_eacces);
+		goto out_dst_close;
+	}
+
+	/* pack parameter */
+	memset(&data, 0x0, sizeof(data));
+	data.subcmd = LL_SNAPSHOT_CREATE;
+	data.req.create.src_fd = src_fd;
+	strncpy(data.req.create.name, target, strlen(target)+1);
+	data.req.create.name_len = strlen(target);
+	data.req.create.hidden_f = hidden;
+	data.req.create.mode = sb.st_mode;
+
+	if ((sb.st_mode & SFBITS_MASK) && S_ISREG(sb.st_mode)) {
+		if (geteuid() != sb.st_uid) {
+			change_ug = true;
+		} else {
+			rc = fstat(dst_fd, &psb);
+			if (rc) {
+				rc = -errno;
+				SNAP_ERR_WARN_LFS_OUT(rc, cmd,  __func__,
+						 __LINE__, src, warn_eacces);
+				goto out_dst_close;
+			}
+			change_ug = (sb.st_gid != ((psb.st_mode & S_ISGID)
+						  ? psb.st_gid : getegid()));
+		}
+		if (change_ug) {
+			data.req.create.mode &= ~SFBITS_MASK;
+			bit_cleared = true;
+		}
+	}
+
+	if (hidden == 0x1) {
+		data.req.create.uid = sb.st_uid;
+		data.req.create.gid = sb.st_gid;
+	} else if (hidden == 0) {
+		data.req.create.atim = sb.st_atim;
+		data.req.create.mtim = sb.st_mtim;
+	}
+
+	/* create snapshot */
+	rc = ioctl(dst_fd, LL_IOC_SNAPSHOT, &data);
+	if (rc == -1) {
+		rc = -errno;
+		if ((hidden == 0) || (rc != -EEXIST))
+			SNAP_ERR_WARN_LFS_OUT(rc, cmd,  __func__,
+					 __LINE__, src, warn_eacces);
+	} else if (bit_cleared) {
+		fprintf(stderr, SNAP_WRN_LFS_BITCLEARED, cmd,
+			sb.st_mode & 07777, dst, target);
+	}
+
+out_dst_close:
+	/* close dest dir */
+	closedir(p_dst_dir);
+
+out_src_close:
+	/* close src */
+	if (src_fd != -1)
+		close(src_fd);
+
+	return rc;
+}
+
+/*
+ *  create_child_snapshots()
+ *
+ *  create hidden directory for snapshots
+ *
+ *  \param[in/out]	src	original directory
+ *  \param[in/out]	dst	snapshot directory
+ *  \param[in/out]	param	snapshot create parameter
+ *
+ *  \retval     0	success
+ *  \retval     not 0	error code
+ */
+static int create_child_snapshots(char *src, char *dst,
+				  struct create_param *param)
+{
+	char *cmd = "create";
+	DIR *dir_p;
+	struct dirent *de;
+	struct statfs fs;
+	char *src_end, *dst_end;
+	int rc = 0;
+
+	/* check path max */
+	if (strlen(dst) + 1 > PATH_MAX) {
+		fprintf(stderr, SNAP_ERR_LFS_DIRLONG, cmd);
+		return -ENAMETOOLONG;
+	}
+
+	/* open parent directory */
+	dir_p = opendir(src);
+	if (dir_p == NULL) {
+		SNAP_ERR_LFS_OUT(-errno, cmd,  __func__, __LINE__, src);
+		return -errno;
+	}
+
+	/* create files */
+	src_end = src + strlen(src);
+	dst_end = dst + strlen(dst);
+	while ((de = readdir(dir_p)) != NULL) {
+
+		/* ignore ".", ".." directory */
+		if (strcmp(".", de->d_name) == 0 ||
+		    strcmp("..", de->d_name) == 0)
+			continue;
+
+		/* skip .l_snapshot directory */
+		if (strcmp(SNAPSHOT_DIR, de->d_name) == 0)
+			continue;
+
+		/* skip .l_snapshot.lock file */
+		if (strcmp(SNAPSHOT_LOCK, de->d_name) == 0)
+			continue;
+
+		/* delete old entry name */
+		*src_end = '\0';
+		*dst_end = '\0';
+
+		/* check path max */
+		if (strlen(dst) + strlen(de->d_name) + 2 > PATH_MAX) {
+			fprintf(stderr, SNAP_ERR_LFS_DIRLONG, cmd);
+			return -ENAMETOOLONG;
+		}
+
+		/* make soruce path */
+		strncat(src, "/", 1);
+		strncat(src, de->d_name, strlen(de->d_name));
+
+		/* check file type */
+		if ((de->d_type != DT_REG) && (de->d_type != DT_DIR)) {
+			fprintf(stdout, SNAP_WRN_LFS_FNOTSUP, cmd, src);
+			continue;
+		}
+
+		/* create snapshot directory, files */
+		if (de->d_type == DT_REG) {
+
+			/* create regular file */
+			rc = create_entry(src, dst, de->d_name, 0);
+			if (rc == -EACCES) {
+				rc = 0;
+				continue;
+			} else if (rc)
+				break;
+
+		} else if (de->d_type == DT_DIR) {
+			char tgt_fsuuid[sizeof(struct obd_uuid)];
+			/* check fs type is lustre , skip another fs */
+			rc = statfs(src, &fs);
+			if (rc) {
+				rc = -errno;
+				if (rc == -EACCES) {
+					fprintf(stderr, SNAP_WRN_LFS_EACCES,
+						cmd, src);
+					rc = 0;
+					continue;
+				} else if (rc) {
+					SNAP_ERR_LFS_OUT(rc, cmd,
+							 __func__,
+							 __LINE__, src);
+					break;
+				}
+			}
+			if (fs.f_type != LL_SUPER_MAGIC) {
+				fprintf(stdout, SNAP_WRN_LFS_ANOTFS,
+					cmd, src);
+				continue;
+			}
+			rc = llapi_getname(src, tgt_fsuuid, sizeof(tgt_fsuuid));
+			if (rc == -EACCES) {
+				fprintf(stderr, SNAP_WRN_LFS_EACCES,
+					cmd, src);
+				rc = 0;
+				continue;
+			} else if (rc < 0) {
+				SNAP_ERR_LFS_OUT(rc, cmd,
+						 __func__, __LINE__, src);
+				break;
+			}
+			if (strcmp(param->top_fsuuid, tgt_fsuuid) != 0) {
+				fprintf(stdout, SNAP_WRN_LFS_ANOTFS,
+					cmd, src);
+				continue;
+			}
+
+			/* create directory */
+			rc = create_entry(src, dst, de->d_name, 0);
+			if (rc == -EACCES) {
+				rc = 0;
+				continue;
+			} else if (rc)
+				break;
+
+			/* create dst path */
+			strncat(dst, "/", 1);
+			strncat(dst, de->d_name, strlen(de->d_name));
+			rc = create_child_snapshots(src, dst, param);
+			if (rc)
+				break;
+		}
+	}
+
+	/* open parent directory */
+	closedir(dir_p);
+
+	/* give back path */
+	*src_end = '\0';
+	*dst_end = '\0';
+
+	return rc;
+}
+static int delete_entry(char *parent, char *target);
+/*
+ *  create_hidden_dir()
+ *
+ *  create hidden directory for snapshots
+ *
+ *  \param[in]	dir		original directory name
+ *  \param[in]	snap_name	snapshot name
+ *  \param[in/out] snap_dir	snapshot directory
+ *  \param[in/out] param	snapshot create parameter
+ *
+ *  \retval     0		success
+ *  \retval     not 0		error code
+ */
+static int create_hidden_dir(char *dir, char *snap_name, char *snap_dir)
+{
+	char		*cmd = "create";
+	char		dst[PATH_MAX + 1], src[PATH_MAX + 1];
+	char		target[PATH_MAX + 1];
+	int		rc;
+	int		create_hidden = 0;
+
+	/* check path max */
+	/* src directory + "/.l_snapshot/"(13) + snap name */
+	if (strlen(dir) + sizeof("/" SNAPSHOT_DIR "/") + strlen(snap_name)
+								> PATH_MAX) {
+		fprintf(stderr, SNAP_ERR_LFS_DIRLONG, cmd);
+		return -ENAMETOOLONG;
+	}
+
+	/* create snapshot hidden directory path */
+	strncpy(dst, dir, strlen(dir)+1);
+	strncpy(src, SNAPSHOT_DIR, strlen(SNAPSHOT_DIR)+1);
+
+	/* create snapshot hidden directory */
+	rc = create_entry(dir, dst, src, 0x1);
+	if (rc == -EEXIST) {
+		snprintf(target, PATH_MAX, "%s/%s", dst, src);
+		rc = check_snapshot_dir(target);
+		if (rc < 0) {
+			SNAP_ERR_LFS_OUT(rc, cmd, __func__, __LINE__, target);
+			return rc;
+		} else if (rc != 1) {
+			fprintf(stderr, SNAP_ERR_LFS_INVSDIR, cmd, target);
+			return -EINVAL;
+		}
+	} else if (rc)
+		return rc;
+	else
+		create_hidden = 1;
+
+	/* create snaphot name directory path */
+	strncat(dst, "/", 1);
+	strncat(dst, src, strlen(src));
+	strncpy(src, snap_name, strlen(snap_name)+1);
+
+	/* copy snapshot name directory */
+	strncpy(snap_dir, dst, strlen(dst)+1);
+	strncat(snap_dir, "/", 1);
+	strncat(snap_dir, src, strlen(src));
+
+	/* create snaphot name directory */
+	rc = create_entry(dir, dst, src, 0x2);
+	if (rc == -EEXIST)
+		fprintf(stderr, SNAP_ERR_LFS_ALSNAP, cmd, snap_name);
+	else if (rc && create_hidden) {
+		/* rmdir .l_snapshot */
+		delete_entry(dir, SNAPSHOT_DIR);
+	}
+	return rc;
+}
+
+/*
+ *  create_snapshot()
+ *
+ *  create hidden directory ,and snapshots
+ *
+ *  \param[in/out]	val_d	original directory name
+ *  \param[in/out]	val_s	snapshot name
+ *
+ *  \retval     0		success
+ *  \retval     not 0		error code
+ */
+int create_snapshot(char *val_d, char *val_s,
+		    struct create_param *param)
+{
+	char snap_dir[PATH_MAX + 1];
+	int rc;
+
+	/* snapshot hidden directory */
+	rc = create_hidden_dir(val_d, val_s, snap_dir);
+	if (rc) {
+		create_post_snapshot(val_d);
+		return rc;
+	}
+
+	/* create snapshot recursively */
+	rc = create_child_snapshots(val_d, snap_dir, param);
+	if (rc) {
+		create_post_snapshot(snap_dir);
+		return rc;
+	}
+
+	/* post create snapshot, delete memory */
+	rc = create_post_snapshot(snap_dir);
+	return rc;
+}
+
+/*
+ *  lfs_snapshot_create
+ *
+ *  lfs snapshot create command
+ *
+ *  \param[in]	argc		number of arguments
+ *  \param[in]	argv		array of arguments
+ *
+ *  \retval     0		success
+ *  \retval     1		error
+ *  \retval     4(=CMD_HELP)	usage
+ */
+int lfs_snapshot_create(int argc, char **argv)
+{
+	char *cmd = "create";
+	int flag_s = 0, flag_d = 0;
+	char val_s[SNAPSHOT_SNAPNAME_MAX + 1], val_d[PATH_MAX + 1];
+	struct stat sb;
+	struct statfs fs;
+	struct create_param param;
+	int c;
+	int rc;
+
+	/* get option */
+	while ((c = getopt(argc, argv, "s:d:")) != -1) {
+		switch (c) {
+		case 's':
+			if (flag_s == 1)
+				return CMD_HELP;
+			if (strlen(optarg) > SNAPSHOT_SNAPNAME_MAX) {
+				fprintf(stderr, SNAP_ERR_LFS_SNAPLONG, cmd);
+				return 1;
+			}
+			strncpy(val_s, optarg, strlen(optarg)+1);
+			flag_s = 1;
+			break;
+		case 'd':
+			if (flag_d == 1)
+				return CMD_HELP;
+			if (strlen(optarg) + 1 > PATH_MAX) {
+				fprintf(stderr, SNAP_ERR_LFS_DIRLONG, cmd);
+				return 1;
+			}
+			strncpy(val_d, optarg, strlen(optarg)+1);
+			flag_d = 1;
+			break;
+		default:
+			return CMD_HELP;
+		}
+	}
+
+	/* check not need option */
+	if (optind < argc)
+		return CMD_HELP;
+
+	/* check -s option */
+	if (flag_s == 0) {
+		time_t now;
+		struct tm *tm;
+
+		/* set default name "sYYYYMMDD_HHMMSS" */
+		time(&now);
+		tm = localtime(&now);
+		snprintf(val_s, sizeof(val_s), "s%04d%02d%02d_%02d%02d%02d",
+			tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+			tm->tm_hour, tm->tm_min, tm->tm_sec);
+	} else {
+		int i;
+
+		/* check "" */
+		if (strlen(val_s) == 0) {
+			fprintf(stderr, SNAP_ERR_LFS_INVSNAP, cmd);
+			return 1;
+		}
+
+		/* check alpha or num or "_" */
+		for (i = 0; i < strlen(val_s); i++) {
+			if ((isalnum(val_s[i]) == 0) && (val_s[i] != '_')) {
+				fprintf(stderr, SNAP_ERR_LFS_INVSNAP, cmd);
+				return 1;
+			}
+		}
+	}
+
+	/* check -d option */
+	if (flag_d == 0) {
+		/* get current directory */
+		rc = get_snapshot_directory(val_d, cmd);
+		if (rc)
+			return 1;
+	}
+	/* check existence of source directory */
+	rc = lstat(val_d, &sb);
+	if (rc < 0) {
+		rc = -errno;
+		if (rc == -ENAMETOOLONG)
+			fprintf(stderr, SNAP_ERR_LFS_NAMELONG, cmd);
+		else
+			SNAP_ERR_LFS_OUT(rc, cmd,  __func__, __LINE__, val_d);
+		return 1;
+	}
+	/* check source dir is directory */
+	if (!S_ISDIR(sb.st_mode)) {
+		fprintf(stderr, SNAP_ERR_LFS_NOTDIR, cmd, val_d);
+		return 1;
+	}
+
+	if ((geteuid() != 0) &&
+	    ((sb.st_mode & S_IRWXU) != S_IRWXU)) {
+		fprintf(stderr, SNAP_WRN_LFS_EACCES, cmd, val_d);
+		return 1;
+	}
+
+	/* get filesystem type */
+	rc = statfs(val_d, &fs);
+	if (rc) {
+		SNAP_ERR_LFS_OUT(-errno, cmd,  __func__, __LINE__, val_d);
+		return 1;
+	}
+	/* check lustre filesystem type MAGIC code */
+	if (fs.f_type != LL_SUPER_MAGIC) {
+		fprintf(stderr, SNAP_ERR_LFS_NOTLUSTRE, cmd, val_d);
+		return 1;
+	}
+	rc = llapi_getname(val_d, param.top_fsuuid,
+			   sizeof(param.top_fsuuid));
+	if (rc < 0) {
+		SNAP_ERR_LFS_OUT(rc, cmd,  __func__, __LINE__, val_d);
+		return 1;
+	}
+
+	/* check snapshot flag */
+	rc = check_snapshot_enabled(val_d);
+	if (rc != 1) {
+		if (rc == 0)
+			fprintf(stderr, SNAP_ERR_LFS_SNAPOFF, cmd);
+		else
+			SNAP_ERR_LFS_OUT(rc, cmd,  __func__, __LINE__, val_d);
+		return 1;
+	}
+
+	/* snapshot command lock */
+	rc = lock_snapshot(val_d, cmd);
+	if (rc)
+		return 1;
+
+	/* create snapshot */
+	rc = create_snapshot(val_d, val_s, &param);
+	if (rc) {
+		unlock_snapshot(val_d, cmd);
+		return 1;
+	}
+
+	/* snapshot command unlock */
+	rc = unlock_snapshot(val_d, cmd);
+	if (rc)
+		return 1;
+
+	return 0;
+}
+
+/*
+ *  delete_entry
+ *
+ *  delete snapshot entry.(call llite)
+ *
+ *  \param[in]	parent		delete parent dirctory
+ *  \param[in]	target		delete target snapshot directory
+ *
+ *  \retval     0		success
+ *  \retval     not 0		error code
+ */
+static int delete_entry(char *parent, char *target)
+{
+	struct snapshot_data data;
+	DIR *dir_p;
+	int fd_p;
+	int rc = 0;
+	char *cmd = "delete";
+	/* open parent */
+	dir_p = opendir(parent);
+	if (dir_p == NULL) {
+		rc = -errno;
+		SNAP_ERR_LFS_OUT(rc, cmd, __func__, __LINE__, parent);
+		return rc;
+	}
+	fd_p = dirfd(dir_p);
+	if (fd_p < 0) {
+		rc = -errno;
+		SNAP_ERR_LFS_OUT(rc, cmd, __func__, __LINE__, parent);
+		goto out;
+	}
+
+	/* pack parameter */
+	memset(&data, 0x0, sizeof(data));
+	data.subcmd = LL_SNAPSHOT_UNLINK;
+	strncpy(data.req.create.name, target, strlen(target)+1);
+	data.req.create.name_len = strlen(data.req.create.name);
+
+	/* delete snapshot */
+	rc = ioctl(fd_p, LL_IOC_SNAPSHOT, &data);
+	if (rc < 0) {
+		rc = -errno;
+		if (rc == -ENOENT)
+			fprintf(stderr, SNAP_ERR_LFS_NOTSNAPD, cmd, target);
+		else if (strcmp(SNAPSHOT_DIR, target)
+			 || (rc != -ENOTEMPTY))
+			SNAP_ERR_LFS_OUT(rc, cmd, __func__,
+					 __LINE__, target);
+	}
+out:
+	/* close target path */
+	closedir(dir_p);
+	return rc;
+}
+
+/*
+ *  delete_child_snapshots
+ *
+ *  delete sub-directory, and delete directory and files
+ *
+ *  \param[in/out]	parent	delete parent dirctory
+ *  \param[in]		target	delete target snapshot directory
+ *
+ *  \retval     0		success
+ *  \retval     not 0		error code
+ */
+static int delete_child_snapshots(char *parent, char *target)
+{
+	DIR *dir_p;
+	struct dirent *de;
+	char *path, *path_end, *cmd = "delete";
+	int rc = 0;
+
+	/* create path */
+	path = parent;
+	path_end = parent + strlen(parent);
+	if (strlen(parent) + strlen(target) + 2 > PATH_MAX) {
+		fprintf(stderr, SNAP_ERR_LFS_DIRLONG, cmd);
+		return -ENAMETOOLONG;
+	}
+	strncat(path, "/", 1);
+	strncat(path, target, strlen(target));
+
+	/* open parent directory */
+	dir_p = opendir(path);
+	if (dir_p == NULL) {
+		rc = -errno;
+		SNAP_ERR_LFS_OUT(rc, cmd, __func__, __LINE__, path);
+		return rc;
+	}
+
+	/* delete snapshots */
+	while ((de = readdir(dir_p)) != NULL) {
+
+		if ((strcmp(".", de->d_name) == 0 ||
+		     strcmp("..", de->d_name) == 0))
+			continue;
+
+		/* check path max */
+		if (strlen(path) + strlen(de->d_name) + 2 > PATH_MAX) {
+			fprintf(stderr, SNAP_ERR_LFS_DIRLONG, cmd);
+			return -ENAMETOOLONG;
+		}
+
+		if (de->d_type == DT_REG) {
+			/* delete regular file */
+			rc = delete_entry(path, de->d_name);
+			if (rc)
+				break;
+		} else if (de->d_type == DT_DIR) {
+			/* delete snapshots of child directory */
+			rc = delete_child_snapshots(path, de->d_name);
+			if (rc)
+				break;
+
+			/* delete directory */
+			rc = delete_entry(path, de->d_name);
+			if (rc)
+				break;
+		} else {
+			rc = -ENOTSUP;
+			break;
+		}
+	}
+
+	/* open parent directory */
+	closedir(dir_p);
+
+	/* give back path */
+	*path_end = '\0';
+
+	return rc;
+}
+
+/*
+ *  delete_snapshot
+ *
+ *  delete snapshot files, snapshot name, and .l_snapshot
+ *
+ *  \param[in]	d_val		delete parent dirctory
+ *  \param[in]	s_val		delete target snapshot name
+ *
+ *  \retval     0		success
+ *  \retval     not 0		error code
+ */
+static int delete_snapshot(char *d_val, char *s_val)
+{
+	char *cmd = "delete";
+	char path[PATH_MAX + 1];
+	int rc;
+
+	/* check .l_snapshot path */
+	snprintf(path, PATH_MAX, "%s/%s", d_val, SNAPSHOT_DIR);
+	rc = check_snapshot_dir(path);
+	if (rc == -ENOENT) {
+		fprintf(stderr, SNAP_ERR_LFS_NOTSNAPD, cmd, s_val);
+		return rc;
+	}
+	if (rc < 0) {
+		SNAP_ERR_LFS_OUT(rc, cmd,  __func__, __LINE__, path);
+		return rc;
+	}
+	if (rc != 1) {
+		fprintf(stderr, SNAP_ERR_LFS_INVSDIR, cmd, path);
+		return -EINVAL;
+	}
+
+	/* check user */
+	rc = check_snapshot_user(path, s_val);
+	if (rc == 0) {
+		fprintf(stderr, SNAP_ERR_LFS_OPEPRMIT, cmd, s_val);
+		return -EPERM;
+	}
+	if (rc == -ENOENT) {
+		fprintf(stderr, SNAP_ERR_LFS_NOTSNAPD, cmd, s_val);
+		return rc;
+	}
+	if (rc != 1) {
+		SNAP_ERR_LFS_OUT(rc, cmd,  __func__, __LINE__, s_val);
+		return rc;
+	}
+
+	/* rm snapshot directory and files */
+	rc = delete_child_snapshots(path, s_val);
+	if (rc)
+		return rc;
+
+	/* rmdir snapshot name */
+	rc = delete_entry(path, s_val);
+	if (rc)
+		return rc;
+
+	/* rmdir .l_snapshot */
+	rc = delete_entry(d_val, SNAPSHOT_DIR);
+	if (rc != 0 && rc != -ENOTEMPTY)
+		return rc;
+
+	return 0;
+}
+
+/*
+ *  lfs_snapshot_delete
+ *
+ *  lfs snapshot delte command
+ *
+ *  \param[in]	argc		number of arguments
+ *  \param[in]	argv		array of arguments
+ *
+ *  \retval     0		success
+ *  \retval     1		error
+ *  \retval     4(=CMD_HELP)	usage
+ */
+int lfs_snapshot_delete(int argc, char **argv)
+{
+	char *cmd = "delete";
+	char val_s[SNAPSHOT_SNAPNAME_MAX + 1], val_d[PATH_MAX + 1];
+	int flag_s = 0, flag_d = 0, flag_f = 0;
+	struct stat sb;
+	struct statfs fs;
+	int c;
+	int rc;
+
+	/* get option */
+	while ((c = getopt(argc, argv, "s:d:f")) != -1) {
+		switch (c) {
+		case 's':
+			if (flag_s == 1)
+				return CMD_HELP;
+			if (strlen(optarg) > SNAPSHOT_SNAPNAME_MAX) {
+				fprintf(stderr, SNAP_ERR_LFS_SNAPLONG, cmd);
+				return 1;
+			}
+			strncpy(val_s, optarg, strlen(optarg)+1);
+			flag_s = 1;
+			break;
+		case 'd':
+			if (flag_d == 1)
+				return CMD_HELP;
+			if (strlen(optarg) + 1 > PATH_MAX) {
+				fprintf(stderr, SNAP_ERR_LFS_DIRLONG, cmd);
+				return 1;
+			}
+			strncpy(val_d, optarg, strlen(optarg)+1);
+			flag_d = 1;
+			break;
+		case 'f':
+			if (flag_f == 1)
+				return CMD_HELP;
+			flag_f = 1;
+			break;
+		default:
+			return CMD_HELP;
+		}
+	}
+
+	/* check not need option */
+	if (optind < argc)
+		return CMD_HELP;
+
+	/* check -s option */
+	if (flag_s == 0)
+		return CMD_HELP;
+	else {
+		int i;
+
+		/* check "" */
+		if (strlen(val_s) == 0) {
+			fprintf(stderr, SNAP_ERR_LFS_INVSNAP, cmd);
+			return 1;
+		}
+
+		/* check alpha or num or "_" */
+		for (i = 0; i < strlen(val_s); i++) {
+			if ((isalnum(val_s[i]) == 0) && (val_s[i] != '_')) {
+				fprintf(stderr, SNAP_ERR_LFS_INVSNAP, cmd);
+				return 1;
+			}
+		}
+	}
+
+	/* check -d option, get current directory */
+	if (flag_d == 0) {
+		/* get current directory */
+		rc = get_snapshot_directory(val_d, cmd);
+		if (rc)
+			return 1;
+	}
+
+	/* check existence of source directory */
+	rc = lstat(val_d, &sb);
+	if (rc < 0) {
+		rc = -errno;
+		if (rc == -ENAMETOOLONG)
+			fprintf(stderr, SNAP_ERR_LFS_NAMELONG, cmd);
+		else
+			SNAP_ERR_LFS_OUT(rc, cmd,  __func__, __LINE__, val_d);
+		return 1;
+	}
+	/* check source dir is a directory */
+	if (!S_ISDIR(sb.st_mode)) {
+		fprintf(stderr, SNAP_ERR_LFS_NOTDIR, cmd, val_d);
+		return 1;
+	}
+
+	/* get filesystem type */
+	rc = statfs(val_d, &fs);
+	if (rc) {
+		SNAP_ERR_LFS_OUT(-errno, cmd,  __func__, __LINE__, val_d);
+		return 1;
+	}
+	/* check lustre filesystem by MAGIC code */
+	if (fs.f_type != LL_SUPER_MAGIC) {
+		fprintf(stderr, SNAP_ERR_LFS_NOTLUSTRE, cmd, val_d);
+		return 1;
+	}
+
+	/* check snapshot enable */
+	rc = check_snapshot_enabled(val_d);
+	if (rc != 1) {
+		if (rc == 0)
+			fprintf(stderr, SNAP_ERR_LFS_SNAPOFF, cmd);
+		else
+			SNAP_ERR_LFS_OUT(rc, cmd,  __func__, __LINE__, val_d);
+		return 1;
+	}
+
+	/* check -f option, confirmation */
+	if (flag_f == 0) {
+		if (snapshot_cofirm(SNAP_MSG_CONFIRM_DELETE) == 0)
+			return 0;
+	}
+
+	/* snapshot command lock */
+	rc = lock_snapshot(val_d, cmd);
+	if (rc)
+		return 1;
+
+	/* snapshot delete files */
+	rc = delete_snapshot(val_d, val_s);
+	if (rc) {
+		unlock_snapshot(val_d, cmd);
+		return 1;
+	}
+
+	/* snapshot command unlock */
+	rc = unlock_snapshot(val_d, cmd);
+	if (rc)
+		return 1;
+
+
+	return 0;
+}
+
+/*
+ *  list_print_entry
+ *
+ *  print snapshot directory
+ *
+ *  \param[in]		target		list target dirctory
+ *  \param[in/out]	count		counter
+ *  \param[in]		flag_lock	lockflag
+ *
+ *  \retval		0		success
+ *  \retval		not 0		error code
+ */
+static int list_print_entry(char *target, int *count, int flag_lock)
+{
+	DIR *dir_p;
+	struct dirent *de;
+	char path[PATH_MAX + 1];
+	char parent[PATH_MAX + 1], *parent_end;
+	struct stat st;
+	struct passwd *pwd;
+	struct group *grp;
+	int len;
+	struct tm *tm;
+	int rc = 0;
+
+	/* open target directory */
+	dir_p = opendir(target);
+	if (dir_p == NULL) {
+		rc = -errno;
+		SNAP_ERR_WARN_LFS_OUT(rc, "list",
+				      __func__,
+				      __LINE__, target,
+				      true);
+		return rc;
+	}
+
+	while ((de = readdir(dir_p)) != NULL) {
+		if ((strcmp(".", de->d_name) == 0 ||
+		     strcmp("..", de->d_name) == 0))
+			continue;
+		if (de->d_type != DT_DIR)
+			continue;
+
+		/* check user permission */
+		snprintf(path, PATH_MAX, "%s/%s", target, de->d_name);
+		rc = access(path, R_OK);
+		if (rc) {
+			fprintf(stdout, SNAP_MSG_NOTOPENDIR, path);
+			(*count)++;
+			continue;
+		}
+
+		/* get directory stat */
+		rc = lstat(path, &st);
+		if (rc) {
+			rc = -errno;
+			SNAP_ERR_WARN_LFS_OUT(rc, "list",
+					      __func__,
+					      __LINE__, path,
+					      true);
+			return rc;
+		}
+
+		/* print snapsshot name */
+		fprintf(stdout, "%-16s ", de->d_name);
+
+		/* print user name */
+		pwd = getpwuid(st.st_uid);
+		if (pwd != NULL && pwd->pw_name != NULL) {
+			fprintf(stdout, "%s:", (char *)pwd->pw_name);
+			len = strlen(pwd->pw_name) + 1;
+		} else {
+			fprintf(stdout, "%d:", st.st_uid);
+			/* (userid's column) + ":" */
+			len = snapshot_digit(st.st_uid) + 1;
+		}
+
+		/* print group name */
+		grp = getgrgid(st.st_gid);
+		if (grp != NULL && grp->gr_name != NULL) {
+			fprintf(stdout, "%s ", grp->gr_name);
+			len += strlen(grp->gr_name) + 1;
+		} else {
+			fprintf(stdout, "%d ", st.st_gid);
+			/* (groupid's column) + space */
+			len += snapshot_digit(st.st_gid) + 1;
+		}
+
+		if (len < 32)
+			fprintf(stdout, "%*s", 32 - len, " ");
+
+		/* print create time */
+		tm = localtime(&(st.st_ctime));
+		fprintf(stdout, "%04d/%02d/%02d %02d:%02d:%02d ",
+					tm->tm_year + 1900,
+					tm->tm_mon + 1,
+					tm->tm_mday,
+					tm->tm_hour,
+					tm->tm_min,
+					tm->tm_sec);
+
+		/* print lock status */
+		if (flag_lock)
+			fprintf(stdout, "*");
+
+		/* print dirctory */
+		strncpy(parent, target, PATH_MAX + 1);
+		parent_end = strrchr(parent, '/');
+		if (parent_end != NULL)
+			*parent_end = '\0';
+		if (realpath(parent, path) != NULL)
+			fprintf(stdout, "%s\n", path);
+		else
+			fprintf(stdout, "%s\n", parent);
+
+		(*count)++;
+	}
+
+	return 0;
+}
+
+/*
+ *  list_print_snapshot
+ *
+ *  search and print snapshot directory
+ *
+ *  \param[in]		target		list parent dirctory
+ *  \param[in]		flag		-R flag
+ *  \param[in]		fstype		filesystem type
+ *  \param[in]		work		work buffer
+ *  \param[in/out]	count		counter
+ *
+ *  \retval		0		success
+ *  \retval		not 0		error code
+ */
+static int list_print_snapshot(char *target, int flag_R, __SWORD_TYPE fstype,
+			       char *work, int *count)
+{
+	DIR *dir_p;
+	struct dirent *de;
+	struct stat st;
+	struct statfs fs;
+	char *path, *path_end;
+	int flag_lock;
+	int rc = 0;
+
+	/* open target directory */
+	dir_p = opendir(target);
+	if (dir_p == NULL) {
+		rc = -errno;
+		SNAP_ERR_WARN_LFS_OUT(rc, "list",
+				      __func__, __LINE__, target,
+				      true);
+		return rc;
+	}
+
+	path = target;
+	path_end = target + strlen(target);
+	while ((de = readdir(dir_p)) != NULL) {
+
+		/* ignore ".", ".." , and not directory */
+		if ((strcmp(".", de->d_name) == 0 ||
+		     strcmp("..", de->d_name) == 0))
+			continue;
+		if (de->d_type != DT_DIR)
+			continue;
+
+		/* make path */
+		*path_end = '\0';
+		strncat(path, "/", PATH_MAX);
+		strncat(path, de->d_name, PATH_MAX);
+
+		/* check filesystem type */
+		rc = statfs(path, &fs);
+		if (rc != 0 || fs.f_type != fstype)
+			continue;
+
+		/* print snapshot name */
+		if (strcmp(SNAPSHOT_DIR, de->d_name) == 0) {
+
+			/* check .l_snapshot */
+			rc = check_snapshot_dir(path);
+			if (rc < 0) {
+				SNAP_ERR_WARN_LFS_OUT(rc, "list",
+						      __func__,
+						      __LINE__, path,
+						      true);
+				continue;
+			}
+			if (rc != 1) {
+				/* directory called recursively */
+				if (flag_R == 0)
+					continue;
+				rc = list_print_snapshot(path, flag_R,
+							fstype, work, count);
+				if (rc == -EACCES)
+					rc = 0;
+				else if (rc)
+					break;
+				continue;
+			}
+
+			/* snapshot lock status */
+			strncpy(work, target, strlen(work)+1);
+			strncat(work, SNAPSHOT_LOCK_POSTFIX,
+					strlen(SNAPSHOT_LOCK_POSTFIX));
+			rc = stat(work, &st);
+			if (rc == 0)
+				flag_lock = 1;
+			else if (errno == ENOENT) {
+				rc = 0;
+				flag_lock = 0;
+			} else {
+				rc = -errno;
+				SNAP_ERR_WARN_LFS_OUT(rc, "list",
+						      __func__,
+						      __LINE__, work,
+						      true);
+				break;
+			}
+
+			/* print entry */
+			rc = list_print_entry(path, count, flag_lock);
+			if (rc)
+				break;
+
+			continue;
+		}
+
+		/* directory called recursively */
+		if (flag_R == 0)
+			continue;
+		rc = list_print_snapshot(path, flag_R, fstype, work, count);
+		if (rc == -EACCES)
+			rc = 0;
+		else if (rc)
+			break;
+	}
+
+	*path_end = '\0';
+	closedir(dir_p);
+
+	return rc;
+}
+
+/*
+ *  list_snapshot
+ *
+ *  list snapshot files
+ *
+ *  \param[in]	d_val		list parent dirctory
+ *  \param[in]	flag		-R flag
+ *
+ *  \retval     0		success
+ *  \retval     not 0		error code
+ */
+static int list_snapshot(char *d_val, int flag)
+{
+	char *cmd = "list";
+	char work[PATH_MAX + 1];
+	struct statfs fs;
+	int rc;
+	int count;
+
+	/* get filesystem type */
+	rc = statfs(d_val, &fs);
+	if (rc) {
+		rc = -errno;
+		SNAP_ERR_LFS_OUT(rc, cmd, __func__, __LINE__, d_val);
+		return rc;
+	}
+
+	/* print list hedder */
+	fprintf(stdout, "%-16s %-31s %-19s %s\n",
+		"[Snapshot name]", "[Owner:Group]",
+		"[Create time]", "[Directory]");
+
+	/* print snapshot */
+	count = 0;
+	rc = list_print_snapshot(d_val, flag, fs.f_type, work, &count);
+	if ((rc == 0) && (count == 0))
+		/* No snapshots exist. */
+		fprintf(stdout, SNAP_MSG_SNAPNOEXIST);
+	return rc;
+}
+
+/*
+ *  lfs_snapshot_list
+ *
+ *  lfs snapshot list command
+ *
+ *  \param[in]	argc		number of arguments
+ *  \param[in]	argv		array of arguments
+ *
+ *  \retval     0		success
+ *  \retval     1		error
+ *  \retval     4(=CMD_HELP)	usage
+ */
+int lfs_snapshot_list(int argc, char **argv)
+{
+	char *cmd = "list";
+	char val_d[PATH_MAX + 1];
+	int flag_d = 0, flag_R = 0;
+	struct stat sb;
+	struct statfs fs;
+	int c;
+	int rc;
+
+	/* get option */
+	while ((c = getopt(argc, argv, "d:R")) != -1) {
+		switch (c) {
+		case 'd':
+			if (flag_d == 1)
+				return CMD_HELP;
+			if (strlen(optarg) + 1 > PATH_MAX) {
+				fprintf(stderr, SNAP_ERR_LFS_DIRLONG, cmd);
+				return 1;
+			}
+			strncpy(val_d, optarg, strlen(optarg)+1);
+			flag_d = 1;
+			break;
+		case 'R':
+			if (flag_R == 1)
+				return CMD_HELP;
+			flag_R = 1;
+			break;
+		default:
+			return CMD_HELP;
+		}
+	}
+
+	/* check not need option */
+	if (optind < argc)
+		return CMD_HELP;
+
+	/* check -d option, get current directory */
+	if (flag_d == 0) {
+		/* get current directory */
+		rc = get_snapshot_directory(val_d, cmd);
+		if (rc)
+			return CMD_INCOMPLETE;
+	}
+
+	/* check existence of source directory */
+	rc = lstat(val_d, &sb);
+	if (rc < 0) {
+		rc = -errno;
+		if (rc == -ENAMETOOLONG)
+			fprintf(stderr, SNAP_ERR_LFS_NAMELONG, cmd);
+		else
+			SNAP_ERR_LFS_OUT(rc, cmd,  __func__, __LINE__, val_d);
+		return 1;
+	}
+	/* check source dir is a directory */
+	if (!S_ISDIR(sb.st_mode)) {
+		fprintf(stderr, SNAP_ERR_LFS_NOTDIR, cmd, val_d);
+		return 1;
+	}
+
+	/* get filesystem type */
+	rc = statfs(val_d, &fs);
+	if (rc) {
+		SNAP_ERR_LFS_OUT(-errno, cmd,  __func__, __LINE__, val_d);
+		return 1;
+	}
+	/* check lustre filesystem by MAGIC code */
+	if (fs.f_type != LL_SUPER_MAGIC) {
+		fprintf(stderr, SNAP_ERR_LFS_NOTLUSTRE, cmd, val_d);
+		return 1;
+	}
+
+	/* check snapshot enable */
+	rc = check_snapshot_enabled(val_d);
+	if (rc != 1) {
+		if (rc == 0)
+			fprintf(stderr, SNAP_ERR_LFS_SNAPOFF, cmd);
+		else
+			SNAP_ERR_LFS_OUT(rc, cmd,  __func__, __LINE__, val_d);
+		return 1;
+	}
+
+	/* print snapshot list */
+	rc = list_snapshot(val_d, flag_R);
+	if (rc)
+		return CMD_INCOMPLETE;
+
+	return CMD_COMPLETE;
+}
diff --git a/lustre/utils/lfs_snapshot.h b/lustre/utils/lfs_snapshot.h
new file mode 100644
index 0000000..9ee9775
--- /dev/null
+++ b/lustre/utils/lfs_snapshot.h
@@ -0,0 +1,217 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ *   Copyright(c) 2016-2017 FUJITSU LIMITED.
+ *   All rights reserved.
+ */
+#ifndef __LFS_SNAPSHOT_H
+#define __LFS_SNAPSHOT_H
+
+#define SNAPSHOT_FSNAME_MAX	8
+#define SNAPSHOT_SNAPNAME_MAX	16
+
+#define SNAPSHOT_DIR	".l_snapshot"
+#define SNAPSHOT_LOCK	".l_snapshot.lock"
+#define SNAPSHOT_LOCK_POSTFIX	".lock"
+
+#define SNAPSHOT_UNLOCK_SLEEP_SEC	1
+#define SNAPSHOT_UNLOCK_RETRY_MAX	10
+
+int lfs_snapshot_create(int argc, char **argv);
+int lfs_snapshot_delete(int argc, char **argv);
+int lfs_snapshot_list(int argc, char **argv);
+
+/* lfs command error message (manual) */
+#define SNAP_ERR_LFS		"error: snapshot: %s: "
+#define SNAP_ERR_LFS_SNAPOFF \
+	SNAP_ERR_LFS		"Snapshot is disabled.\n"
+#define SNAP_ERR_LFS_EACCES \
+	SNAP_ERR_LFS		"Permission denied - %s.\n"
+#define SNAP_ERR_LFS_OPEPRMIT \
+	SNAP_ERR_LFS		"Operation not permitted. - %s.\n"
+#define SNAP_ERR_LFS_CMDBUSY \
+	SNAP_ERR_LFS		"Snapshot command is busy. - %s.\n"
+#define SNAP_ERR_LFS_ENOMEM \
+	SNAP_ERR_LFS		"Cannot allocate memory.\n"
+#define SNAP_ERR_LFS_ENOSPC \
+	SNAP_ERR_LFS		"No disk space left\n"
+#define SNAP_ERR_LFS_NOTLUSTRE \
+	SNAP_ERR_LFS		"Not on Lustre - %s.\n"
+#define SNAP_ERR_LFS_NOTDIR \
+	SNAP_ERR_LFS		"Not a directory - %s.\n"
+#define SNAP_ERR_LFS_ALSNAP \
+	SNAP_ERR_LFS		"Snapshot already exists - %s.\n"
+#define SNAP_ERR_LFS_INVSDIR \
+	SNAP_ERR_LFS		"Invalid \"" SNAPSHOT_DIR "\" exists - %s.\n"
+#define SNAP_ERR_LFS_INVSNAP \
+	SNAP_ERR_LFS		"Invalid Snapshot Name.\n"
+#define SNAP_ERR_LFS_SNAPLONG \
+	SNAP_ERR_LFS		"Snapshot name is too long.\n"
+#define SNAP_ERR_LFS_NOTSNAPD						\
+	SNAP_ERR_LFS		"Snapshot does not exist - %s.\n"
+#define SNAP_ERR_LFS_NOENT \
+	SNAP_ERR_LFS		"%s does not exist.\n"
+#define SNAP_ERR_LFS_NOTOPE \
+	SNAP_ERR_LFS		"Operation not supported.\n"
+#define SNAP_ERR_LFS_EDQUOT \
+	SNAP_ERR_LFS		"Quota exceeded.\n"
+#define SNAP_ERR_LFS_ETIME \
+	SNAP_ERR_LFS		"Timer expired.\n"
+#define SNAP_ERR_LFS_SYSTEM \
+	SNAP_ERR_LFS		"An unexpected error has occurred. " \
+				"func=%s route=%d code=%d\n"
+#define SNAP_ERR_LFS_NOCDIR \
+	SNAP_ERR_LFS		"Current directory does not exist.\n"
+#define SNAP_ERR_LFS_DIRLONG \
+	SNAP_ERR_LFS		"Directory path is too long.\n"
+#define SNAP_ERR_LFS_NAMELONG \
+	SNAP_ERR_LFS		"Directory name is too long.\n"
+
+#define SNAP_ERR_WARN_LFS_OUT(err, cmd_name, func, route, str, warn_eacces)  \
+do {							    \
+	if (((err) == -ENODEV) || ((err) == -ENOENT)) {	    \
+		fprintf(stderr, SNAP_ERR_LFS_NOENT, cmd_name, str); \
+	} else if ((err) == -ENOMEM) {				    \
+		fprintf(stderr, SNAP_ERR_LFS_ENOMEM, cmd_name); \
+	} else if (((err) == -EACCES || (err) == -EPERM) && !(warn_eacces)) { \
+		fprintf(stderr, SNAP_ERR_LFS_EACCES, cmd_name, str);	\
+	} else if (((err) == -EACCES || (err) == -EPERM) && (warn_eacces)) { \
+		fprintf(stderr, SNAP_WRN_LFS_EACCES, cmd_name, str);	\
+	} else if ((err) == -ENOTDIR) {					\
+		fprintf(stderr, SNAP_ERR_LFS_NOTDIR, cmd_name, str);	\
+	} else if ((err) == -ENOSPC) {					\
+		fprintf(stderr, SNAP_ERR_LFS_ENOSPC, cmd_name);		\
+	} else if ((err) == -EDQUOT) {					\
+		fprintf(stderr, SNAP_ERR_LFS_EDQUOT, cmd_name);		\
+	} else if ((err) == -ETIME) {					\
+		fprintf(stderr, SNAP_ERR_LFS_ETIME, cmd_name);		\
+	} else if ((err) == -ENAMETOOLONG) {				\
+		fprintf(stderr, SNAP_ERR_LFS_DIRLONG, cmd_name);	\
+	} else {							\
+		fprintf(stderr, SNAP_ERR_LFS_SYSTEM,		 \
+			cmd_name, func, route, err);		 \
+	}							 \
+} while (0)
+
+#define SNAP_ERR_LFS_OUT(err, cmd_name, func, route, str) \
+	SNAP_ERR_WARN_LFS_OUT(err, cmd_name, func, route, str, false)
+
+/* lctl command error message (manual) */
+#define SNAP_ERR_LCTL		"error: %s: "
+#define SNAP_ERR_LCTL_NOMDT0 \
+	SNAP_ERR_LCTL		"%s-MDT0000 does not exist.\n"
+#define SNAP_ERR_LCTL_NODEV \
+	SNAP_ERR_LCTL		"%s does not exist.\n"
+#define SNAP_ERR_LCTL_FSLONG \
+	SNAP_ERR_LCTL		"Fsname is too long.\n"
+#define SNAP_ERR_LCTL_OPEPRMIT \
+	SNAP_ERR_LCTL		"Operation not permitted.\n"
+#define SNAP_ERR_LCTL_HSMON	\
+	SNAP_ERR_LCTL		"HSM is enabled.\n"
+#define SNAP_ERR_LCTL_SNAPON	\
+	SNAP_ERR_LCTL		"Snapshot is enabled.\n"
+#define SNAP_ERR_LCTL_ENOMEM \
+	SNAP_ERR_LCTL		"Cannot allocate memory.\n"
+#define SNAP_ERR_LCTL_EINVAL \
+	SNAP_ERR_LCTL		"Invalid fid.\n"
+#define SNAP_ERR_LCTL_ETIME \
+	SNAP_ERR_LCTL		"Timer expired.\n"
+#define SNAP_ERR_LCTL_CMDBUSY \
+	SNAP_ERR_LCTL		"Snapshot command is busy.\n"
+#define SNAP_ERR_LCTL_EEXIST \
+	SNAP_ERR_LCTL		"Cannot be deleted, " \
+				"it is being referenced from multiple files.\n"
+#define SNAP_ERR_LCTL_SYSTEM \
+	SNAP_ERR_LCTL		"An unexpected error has occurred. " \
+				"func=%s route=%d code=%d\n"
+
+#define SNAP_ERR_LCTL_OUT(err, cmd_name, func, route, str) \
+do {								\
+	if (((err) == -ENODEV) || ((err) == -ENOENT)) {		    \
+		fprintf(stderr, SNAP_ERR_LCTL_NODEV, cmd_name, str); \
+	} else if ((err) == -ENOMEM) {				     \
+		fprintf(stderr, SNAP_ERR_LCTL_ENOMEM, cmd_name); \
+	} else if ((err) == -ETIME) {				 \
+		fprintf(stderr, SNAP_ERR_LCTL_ETIME, cmd_name); \
+	} else {						 \
+		fprintf(stderr, SNAP_ERR_LCTL_SYSTEM,		 \
+			cmd_name, func, route, err);		 \
+	}							 \
+} while (0)
+
+/* lfs command warning message (manual) */
+#define SNAP_WRN_LFS		"warning: snapshot: %s: "
+#define SNAP_WRN_LFS_EACCES \
+	SNAP_WRN_LFS		"Permission denied. - %s\n"
+#define SNAP_WRN_LFS_FNOTSUP \
+	SNAP_WRN_LFS		"File type not supported. - %s\n"
+#define SNAP_WRN_LFS_ANOTFS \
+	SNAP_WRN_LFS		"A directory of another filesystem. - %s\n"
+#define SNAP_WRN_LFS_BITCLEARED \
+	SNAP_WRN_LFS            "SUID and/or SGID bits are cleared (%o)." \
+				" - %s/%s\n"
+
+/* command message (not written in the manual) */
+#define SNAP_MSG_CONFIRM_DELETE	"Delete snapshot?"
+#define	SNAP_MSG_CONFIRM_ENABLE	\
+			"Snapshot can not be disabled after once enabled.\n" \
+			"Do you want to enable?"
+#define	SNAP_MSG_CONFIRM_ORPHANDELETE  "Delete orphan object?"
+
+#define SNAP_MSG_NOTOPENDIR	"Cannot open directory <%s> :" \
+				"Permission denied.\n"
+#define SNAP_MSG_SNAPNOEXIST	"No snapshots exist.\n"
+#define SNAP_MSG_ENABLED	"snapshot is enabled.\n"
+#define SNAP_MSG_DISABLED	"snapshot is disabled.\n"
+#define SNAP_MSG_HASENABLED	"snapshot has been enabled.\n"
+#define SNAP_MSG_ALENABLED	"snapshot status is already enabled.\n"
+
+static inline int snapshot_cofirm(char *message)
+{
+	while (1) {
+		char ans, cr;
+
+		fprintf(stdout, "%s [Y/y : N/n] ", message);
+
+		/* input answer */
+		fscanf(stdin, "%c", &ans);
+
+		/* input cr only */
+		if (ans == '\n')
+			continue;
+
+		/* clear stdin buffer */
+		cr = getc(stdin);
+		if (cr != '\n') {
+			while ((cr = getc(stdin)) != EOF && cr != '\n')
+				;
+			continue;
+		}
+
+		/* input yes or no */
+		if ((ans == 'y') || (ans == 'Y'))
+			return 1;
+		if ((ans == 'n') || (ans == 'N'))
+			return 0;
+	}
+}
+#endif  /* __LFS_SNAPSHOT_H */
diff --git a/lustre/utils/lustre_cfg.c b/lustre/utils/lustre_cfg.c
index 18df7d8..3ffefe1 100644
--- a/lustre/utils/lustre_cfg.c
+++ b/lustre/utils/lustre_cfg.c
@@ -67,6 +67,8 @@
 #include <libcfs/libcfsutil.h>
 #include <stdio.h>
 
+#include "lfs_snapshot.h"
+
 static char * lcfg_devname;
 
 int lcfg_set_devname(char *name)
@@ -1051,6 +1053,7 @@ static int setparam_display(struct param_opts *popt, char *pattern, char *value)
         int i;
         glob_t glob_info;
         char filename[PATH_MAX + 1];    /* extra 1 byte for file type */
+	int rc_snapshot;
 
         rc = glob(pattern, GLOB_BRACE, NULL, &glob_info);
         if (rc) {
@@ -1070,6 +1073,23 @@ static int setparam_display(struct param_opts *popt, char *pattern, char *value)
 			if (valuename)
 				printf("%s=%s\n", valuename, value);
 		}
+
+		/* snapshot check for hsm */
+		rc_snapshot = snapshot_check_and_lock_for_hsm(
+						glob_info.gl_pathv[i], value);
+		if (rc_snapshot == 1) {
+			/* snapshot is enabled. */
+			fprintf(stderr, SNAP_ERR_LCTL_SNAPON, "set_param");
+			rc = -EINVAL;
+			break;
+		} else if (rc_snapshot < 0) {
+			fprintf(stderr, "error: set_param: setting "
+				"%s=%s: %s\n", glob_info.gl_pathv[i],
+				value, strerror(errno));
+			rc = -EINVAL;
+			break;
+		}
+
 		/* Write the new value to the file */
 		fd = open(glob_info.gl_pathv[i], O_WRONLY);
 		if (fd >= 0) {
@@ -1085,6 +1105,10 @@ static int setparam_display(struct param_opts *popt, char *pattern, char *value)
 			fprintf(stderr, "error: set_param: %s opening %s\n",
 				strerror(rc = errno), glob_info.gl_pathv[i]);
 		}
+
+		/* snapshot check finished for hsm */
+		if (rc_snapshot == 0)
+			snapshot_unlock_for_hsm();
 	}
 
 	globfree(&glob_info);
diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c
index 9bac523..ef68a32 100644
--- a/lustre/utils/obd.c
+++ b/lustre/utils/obd.c
@@ -74,6 +74,8 @@
 #include <libcfs/libcfsutil.h>
 #include <lustre/lustreapi.h>
 
+#include "lfs_snapshot.h"
+
 #define MAX_STRING_SIZE 128
 #define DEVICES_LIST "/proc/fs/lustre/devices"
 
@@ -4364,4 +4366,843 @@ int jt_changelog_deregister(int argc, char **argv)
         return 0;
 }
 
+/*
+ *  snapshot_unlock_for_hsm()
+ *
+ *  unlock snapshot sataus flag, lctl set_param command for hsm
+ *
+ *  \retval    0		success
+ *  \retval    not 0		unlock error
+ */
+int snapshot_unlock_for_hsm(void)
+{
+	char raw_buf[MAX_IOC_BUFLEN];
+	char *buf = raw_buf;
+	char *cmd_name = "set_param";
+	struct obd_ioctl_data data;
+	int rc;
+
+	/* mdt0 unlock send request */
+	memset(&data, 0, sizeof(data));
+	data.ioc_dev = cur_device;
+	data.ioc_command = OBD_IOC_SNAPSHOT_UNLOCK;
+	memset(buf, 0, sizeof(raw_buf));
+	rc = obd_ioctl_pack(&data, &buf, sizeof(raw_buf));
+	if (rc) {
+		fprintf(stderr, SNAP_ERR_LCTL_SYSTEM,
+			cmd_name, __func__, __LINE__, rc);
+		return rc;
+	}
+	rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SNAPSHOT, buf);
+	if (rc < 0) {
+		rc = -errno;
+		fprintf(stderr, SNAP_ERR_LCTL_SYSTEM,
+			cmd_name, __func__, __LINE__, rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+/*
+ *  snapshot_check_and_lock_for_hsm()
+ *
+ *  check snapshot sataus, lctl set_param command for hsm
+ *
+ *  \param[in]	name		parameter
+ *  \param[in]	value		value of parameter
+ *
+ *  \retval	0		snapshot is disabled, and locked
+ *  \retval	1		snapshot is enabled
+ *  \retval	2		check is unnecessary
+ *  \retval	-1		error
+ */
+int snapshot_check_and_lock_for_hsm(char *name, char *value)
+{
+	char *ptr;
+	char *ptr1, *ptr2;
+	char *dev_name;
+	char name_buf[PATH_MAX + 1];
+	char *cmd_name = "set_param";
+	char raw_buf[MAX_IOC_BUFLEN];
+	char *buf = raw_buf;
+	struct obd_ioctl_data data;
+	int rc, len;
+
+	/* check value */
+	if ((strcmp(value, "enabled") != 0))
+		return 2;
+
+	/* check name */
+	if (strlen(name) > PATH_MAX)
+		return -1;
+	strncpy(name_buf, name, sizeof(name_buf));
+	ptr = strtok(name_buf, "/");
+	ptr1 = NULL;
+	ptr2 = NULL;
+	while (ptr != NULL) {
+		ptr2 = ptr1;
+		ptr1 = ptr;
+		ptr = strtok(NULL, "/");
+	}
+	if (ptr1 == NULL || ptr2 == NULL)
+		return 2;
+	if (strcmp(ptr1, "hsm_control") != 0)
+		return 2;
+
+	/* check fsname */
+	len = strlen(ptr2) - strlen("-MDT0000");
+	if (len < 0 || len > SNAPSHOT_FSNAME_MAX)
+		return 2;
+
+	/* delete -MDT0000 string */
+	ptr = ptr2 + strlen(ptr2) - strlen("-MDT0000");
+	if (strcmp(ptr, "-MDT0000") != 0)
+		return 2;
+	dev_name = ptr2;
+	rc = do_device("device", dev_name);
+	if (rc)
+		return -1;
+
+	/* mdt0 lock request */
+	memset(&data, 0, sizeof(data));
+	data.ioc_dev = cur_device;
+	data.ioc_command = OBD_IOC_SNAPSHOT_LOCK;
+	memset(buf, 0, sizeof(raw_buf));
+	rc = obd_ioctl_pack(&data, &buf, sizeof(raw_buf));
+	if (rc) {
+		fprintf(stderr, SNAP_ERR_LCTL_SYSTEM,
+			cmd_name, __func__, __LINE__, rc);
+		return rc;
+	}
+	rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SNAPSHOT, buf);
+	if (rc < 0) {
+		rc = -errno;
+		fprintf(stderr, SNAP_ERR_LCTL_SYSTEM,
+			cmd_name, __func__, __LINE__, rc);
+		return rc;
+	}
+
+	/* check snapshot status send request */
+	memset(&data, 0, sizeof(data));
+	data.ioc_dev = cur_device;
+	data.ioc_command = OBD_IOC_SNAPSHOT_STATUS;
+	memset(buf, 0, sizeof(raw_buf));
+	rc = obd_ioctl_pack(&data, &buf, sizeof(raw_buf));
+	if (rc) {
+		fprintf(stderr, SNAP_ERR_LCTL_SYSTEM,
+			cmd_name, __func__, __LINE__, rc);
+		snapshot_unlock_for_hsm();
+		return rc;
+	}
+	rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SNAPSHOT, buf);
+	if (rc < 0) {
+		rc = -errno;
+		fprintf(stderr, SNAP_ERR_LCTL_SYSTEM,
+			cmd_name, __func__, __LINE__, rc);
+		snapshot_unlock_for_hsm();
+		return rc;
+	}
+
+	/* check snapshot status */
+	if (rc == 1) {
+		snapshot_unlock_for_hsm();
+		return rc;
+	}
+
+	return 0;
+}
+
+/*
+ *  snapshot_check_hsm()
+ *
+ *  check hsm sataus, lctl snapshot on command
+ *
+ *  \param[in]	fs_name		filesystem name
+ *
+ *  \retval	0		hsm is disabled
+ *  \retval	1		hsm is enabled
+ *  \retval	-errno		error
+ */
+static int snapshot_check_hsm(char *fs_name)
+{
+	char param[PATH_MAX + 1];
+	char buf[32];
+	int fd;
+	int rc;
+
+	/* open parameter file */
+	snprintf(param, PATH_MAX,
+		"/proc/fs/lustre/mdt/%s-MDT0000/hsm_control", fs_name);
+	fd = open(param, O_RDONLY);
+	if (fd < 0) {
+		if (errno == EACCES)
+			return 0;
+		return -errno;
+	}
+
+	/* read parameter file */
+	rc = read(fd, buf, sizeof(buf));
+	if (rc < 0) {
+		close(fd);
+		return -errno;
+	}
+
+	/* check parameter */
+	if (rc == 0) {
+		close(fd);
+		return 0;
+	}
+	buf[rc - 1] = '\0';
+	if (strcmp(buf, "enabled") != 0) {
+		close(fd);
+		return 0;
+	}
+
+	/* close parameter file */
+	close(fd);
+
+	return 1;
+}
+
+/*
+ *  snapshot_find_mdt0
+ *
+ *  find mdt0 device
+ *
+ *  \param[in]	devname		device name
+ *
+ *  \retval	1		found MDT0
+ *  \retval	0		not found MDT0 device
+ *  \retval	not 0		error code
+ */
+static int snapshot_find_mdt0(char *devname)
+{
+	FILE *fp;
+	char buf[MAX_STRING_SIZE];
+	char *p;
+	int ret = 0;
+
+	fp = fopen(DEVICES_LIST, "r");
+	if (fp == NULL)
+		return -errno;
+
+	while (1) {
+		p = fgets(buf, sizeof(buf), fp);
+		if (p == NULL) {
+			ret = -errno;
+			break;
+	}
+
+	while (*p == ' ')	/* brank skip */
+		p++;
+	p = strchr(p, ' ');	/* device no skip */
+	if (p == NULL)
+		continue;
+
+	while (*p == ' ')	/* brank skip */
+		p++;
+	if (strncmp(p, "UP ", sizeof("UP ") - 1) != 0)
+		continue;
+	p = strchr(p, ' ');	/* next brank */
+	if (p == NULL)
+		continue;
+
+	while (*p == ' ')	/* brank skip */
+		p++;
+	if (strncmp(p, "mdt ", sizeof("mdt ") - 1) != 0)
+		continue;
+	p = strchr(p, ' ');	/* next brank */
+	if (p == NULL)
+		continue;
+
+	while (*p == ' ')	/* brank skip */
+		p++;
+	if (strncmp(devname, p, strlen(devname)) == 0) {
+		ret = 1;
+		break;
+		}
+	}
+
+	fclose(fp);
+
+	return ret;
+}
+
+/*
+ *  jt_snapshot_on
+ *
+ *  lctl snapshot on command
+ *
+ *  \param[in]	cmd_name	command name
+ *  \param[in]	fs_name		filesystem name
+ *
+ *  \retval	0		success
+ *  \retval	not 0		error code
+ */
+static int jt_snapshot_on(char *cmd_name, char *fs_name)
+{
+	char raw_buf[MAX_IOC_BUFLEN];
+	char *buf = raw_buf;
+	struct obd_ioctl_data data;
+	int rc, rc_ul;
+
+	/* confirmation */
+	if (snapshot_cofirm(SNAP_MSG_CONFIRM_ENABLE) == 0)
+		return 0;
+
+	/* mdt0 lock request */
+	memset(&data, 0, sizeof(data));
+	data.ioc_dev = cur_device;
+	data.ioc_command = OBD_IOC_SNAPSHOT_LOCK;
+	memset(buf, 0, sizeof(raw_buf));
+	rc = obd_ioctl_pack(&data, &buf, sizeof(raw_buf));
+	if (rc) {
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+				  __func__, __LINE__, "");
+		return rc;
+	}
+	rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SNAPSHOT, buf);
+	if (rc < 0) {
+		rc = -errno;
+		if (rc == -EBUSY)
+			fprintf(stderr, SNAP_ERR_LCTL_CMDBUSY, cmd_name);
+		else
+			SNAP_ERR_LCTL_OUT(rc, cmd_name,
+					  __func__, __LINE__, "");
+		return rc;
+	}
+
+	/* check snapshot status send request */
+	memset(&data, 0, sizeof(data));
+	data.ioc_dev = cur_device;
+	data.ioc_command = OBD_IOC_SNAPSHOT_STATUS;
+	memset(buf, 0, sizeof(raw_buf));
+	rc = obd_ioctl_pack(&data, &buf, sizeof(raw_buf));
+	if (rc) {
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+				  __func__, __LINE__, "");
+		goto unlock;
+	}
+	rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SNAPSHOT, buf);
+	if (rc < 0) {
+		rc = -errno;
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+				  __func__, __LINE__, "");
+		goto unlock;
+	}
+
+	/* check snapshot status */
+	if (rc == 1) {
+		rc = 0;
+		fprintf(stdout, SNAP_MSG_ALENABLED);
+		goto unlock;
+	}
+
+	/* check hsm status */
+	rc = snapshot_check_hsm(fs_name);
+	if (rc == 1) {
+		fprintf(stderr, SNAP_ERR_LCTL_HSMON, cmd_name);
+		goto unlock;
+	} else if (rc < 0) {
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+				  __func__, __LINE__, "");
+		goto unlock;
+	}
+
+	/* snapshot on send request */
+	memset(&data, 0, sizeof(data));
+	data.ioc_dev = cur_device;
+	data.ioc_command = OBD_IOC_SNAPSHOT_ON;
+	memset(buf, 0, sizeof(raw_buf));
+	rc = obd_ioctl_pack(&data, &buf, sizeof(raw_buf));
+	if (rc) {
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+				  __func__, __LINE__, "");
+		goto unlock;
+	}
+	rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SNAPSHOT, buf);
+	if (rc < 0) {
+		rc = -errno;
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+				  __func__, __LINE__, "");
+		goto unlock;
+	}
+
+	/* snapshot has been enabled. */
+	fprintf(stdout, SNAP_MSG_HASENABLED);
+
+unlock:
+	/* mdt0 unlock send request */
+	memset(&data, 0, sizeof(data));
+	data.ioc_dev = cur_device;
+	data.ioc_command = OBD_IOC_SNAPSHOT_UNLOCK;
+	memset(buf, 0, sizeof(raw_buf));
+	rc_ul = obd_ioctl_pack(&data, &buf, sizeof(raw_buf));
+	if (rc_ul) {
+		SNAP_ERR_LCTL_OUT(rc_ul, cmd_name,
+				  __func__, __LINE__, "");
+		return rc ? rc : rc_ul;
+	}
+	rc_ul = l_ioctl(OBD_DEV_ID, OBD_IOC_SNAPSHOT, buf);
+	if (rc_ul < 0) {
+		rc_ul = -errno;
+		SNAP_ERR_LCTL_OUT(rc_ul, cmd_name,
+				  __func__, __LINE__, "");
+		return rc ? rc : rc_ul;
+	}
+
+	return rc;
+}
+
+/*
+ *  jt_snapshot_staus
+ *
+ *  lctl snapshot status command
+ *
+ *  \param[in]	cmd_name	command name
+ *  \param[in]	fs_name		filesystem name
+ *
+ *  \retval	0		success
+ *  \retval	not 0		error code
+ */
+static int jt_snapshot_status(char *cmd_name, char *fs_name)
+{
+	char raw_buf[MAX_IOC_BUFLEN];
+	char *buf = raw_buf;
+	struct obd_ioctl_data data;
+	int rc;
+
+	/* prepare request data */
+	memset(&data, 0, sizeof(data));
+	data.ioc_dev = cur_device;
+	data.ioc_command = OBD_IOC_SNAPSHOT_STATUS;
+	memset(buf, 0, sizeof(raw_buf));
+	rc = obd_ioctl_pack(&data, &buf, sizeof(raw_buf));
+	if (rc) {
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+			__func__, __LINE__, "");
+		return rc;
+	}
+
+	/* send request */
+	rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SNAPSHOT, buf);
+	if (rc < 0) {
+		rc = -errno;
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+			__func__, __LINE__, "");
+		return rc;
+	}
+
+	/* print status */
+	if (rc == 1)
+		/* snapshot is enabled. */
+		fprintf(stdout, SNAP_MSG_ENABLED);
+	else
+		/* snapshot is disabled. */
+		fprintf(stdout, SNAP_MSG_DISABLED);
+
+	return 0;
+}
+
+/*
+ *  jt_snapshot_unlock
+ *
+ *  lctl snapshot unlock command
+ *
+ *  \param[in]	cmd_name	command name
+ *  \param[in]	fs_name		filesystem name
+ *
+ *  \retval	0		success
+ *  \retval	not 0		error code
+ */
+static int jt_snapshot_unlock(char *cmd_name, char *fs_name)
+{
+	char raw_buf[MAX_IOC_BUFLEN];
+	char *buf = raw_buf;
+	struct obd_ioctl_data data;
+	int rc;
+
+	/* prepare request data */
+	memset(&data, 0, sizeof(data));
+	data.ioc_dev = cur_device;
+	data.ioc_command = OBD_IOC_SNAPSHOT_UNLOCK;
+	memset(buf, 0, sizeof(raw_buf));
+	rc = obd_ioctl_pack(&data, &buf, sizeof(raw_buf));
+	if (rc) {
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+			__func__, __LINE__, "");
+		return rc;
+	}
+
+	/* send request */
+	rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SNAPSHOT, buf);
+	if (rc < 0) {
+		rc = -errno;
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+			__func__, __LINE__, "");
+		return rc;
+	}
+
+	return 0;
+}
+
+/*
+ *  jt_snapshot
+ *
+ *  lctl snapshot command
+ *
+ *  \param[in]	argc		number of arguments
+ *  \param[in]	argv		array of arguments
+ *
+ *  \retval	0		success
+ *  \retval	not 0		error code
+ */
+int jt_snapshot(int argc, char **argv)
+{
+	char *cmd_name;
+	char *subcmd_name;
+	char *fs_name;
+	char dev_name[SNAPSHOT_FSNAME_MAX + 8 + 1]; /* 8: "-MDT0000", 1: \0 */
+	uid_t uid;
+	int rc;
+
+	if (argc != 3)
+		return CMD_HELP;
+
+	cmd_name = jt_cmdname(argv[0]);
+	subcmd_name = argv[1];
+	fs_name = argv[2];
+
+	/* check root user */
+	uid = geteuid();
+	if (uid != 0) {
+		fprintf(stderr, SNAP_ERR_LCTL_OPEPRMIT, cmd_name);
+		return CMD_INCOMPLETE;
+	}
+
+	/* set device name */
+	if (strlen(fs_name) > SNAPSHOT_FSNAME_MAX) {
+		fprintf(stderr, SNAP_ERR_LCTL_FSLONG, cmd_name);
+		return CMD_INCOMPLETE;
+	}
+	rc = snprintf(dev_name, sizeof(dev_name), "%s-MDT0000", fs_name);
+
+	/* set current device */
+	rc = snapshot_find_mdt0(dev_name);
+	if (rc != 1) {
+		fprintf(stderr, SNAP_ERR_LCTL_NOMDT0, cmd_name, fs_name);
+		return CMD_INCOMPLETE;
+	}
+	rc = do_device("device", dev_name);
+	if (rc) {
+		fprintf(stderr, SNAP_ERR_LCTL_NOMDT0, cmd_name, fs_name);
+		return CMD_INCOMPLETE;
+	}
+
+	/* call subcmd */
+	if (strcmp(subcmd_name, "on") == 0)
+		rc = jt_snapshot_on(cmd_name, fs_name);
+	else if (strcmp(subcmd_name, "status") == 0)
+		rc = jt_snapshot_status(cmd_name, fs_name);
+	else if (strcmp(subcmd_name, "debug_unlock") == 0)
+		rc = jt_snapshot_unlock(cmd_name, fs_name);
+	else
+		return CMD_HELP;
+	if (rc)
+		return CMD_INCOMPLETE;
+	return CMD_COMPLETE;
+}
+
+static int jt_snapshot_orphan_del(char *cmd_name, struct obd_ioctl_data *data,
+				  struct lu_fid *fid,
+				  int flag_f)
+{
+	char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
+	struct snapshot_list_buf list_buf;
+	int rc;
+
+	if (!flag_f &&
+		snapshot_cofirm(SNAP_MSG_CONFIRM_ORPHANDELETE) == 0)
+		return CMD_COMPLETE;
+
+	memset(&list_buf, 0, sizeof(list_buf));
+
+	data->ioc_inlbuf1 = (char *)fid;
+	data->ioc_inllen1 = sizeof(struct lu_fid);
+
+	memset(buf, 0, sizeof(rawbuf));
+	rc = obd_ioctl_pack(data, &buf, sizeof(rawbuf));
+	if (rc) {
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+					__func__, __LINE__, "");
+		return CMD_INCOMPLETE;
+	}
+	rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SNAPSHOT_ORPHAN, buf);
+	if (rc) {
+		rc = -errno;
+		if (rc == -EEXIST)
+			fprintf(stderr, SNAP_ERR_LCTL_EEXIST, cmd_name);
+		else if (rc == -EBFONT)
+			fprintf(stderr, SNAP_ERR_LCTL_EINVAL, cmd_name);
+		else
+			SNAP_ERR_LCTL_OUT(rc, cmd_name,
+					  __func__, __LINE__, "OST object");
+		return CMD_INCOMPLETE;
+	}
+	return CMD_COMPLETE;
+}
+
+static int jt_snapshot_orphan_inode(char *cmd_name, struct obd_ioctl_data *data,
+				    struct lu_fid *fid)
+{
+	char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
+	struct snapshot_list_buf list_buf;
+	struct snapshot_list_buf *rep_list_buf = NULL;
+	int rc, i;
+
+	memset(&list_buf, 0, sizeof(list_buf));
+
+	data->ioc_inlbuf1 = (char *)fid;
+	data->ioc_inllen1 = sizeof(struct lu_fid);
+
+	data->ioc_pbuf1 = (char *)&list_buf;
+	data->ioc_plen1 = sizeof(list_buf);
+
+	memset(buf, 0, sizeof(rawbuf));
+	rc = obd_ioctl_pack(data, &buf, MAX_IOC_BUFLEN);
+	if (rc) {
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+				  __func__, __LINE__, "");
+		return CMD_INCOMPLETE;
+	}
+	rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SNAPSHOT_ORPHAN, buf);
+	if (rc) {
+		rc = -errno;
+		if (rc == -EBFONT) {
+			fprintf(stderr, SNAP_ERR_LCTL_EINVAL, cmd_name);
+			return CMD_INCOMPLETE;
+		} else if (rc != -EAGAIN) {
+			SNAP_ERR_LCTL_OUT(rc, cmd_name,
+					  __func__, __LINE__, "OST object");
+			return CMD_INCOMPLETE;
+
+		}
+	}
+	obd_ioctl_unpack(data, buf, MAX_IOC_BUFLEN);
+	rep_list_buf = (struct snapshot_list_buf *)(data->ioc_pbuf1);
+
+	fprintf(stdout, "orphan_inode: file_fid=<none>, "
+		"OST_ino=%lu\n",
+		rep_list_buf->list_data[0].ost_ino);
+
+	for (i = 1; i < rep_list_buf->list_num; i++) {
+		if (rep_list_buf->list_data[i].fid.f_seq != 0)
+			fprintf(stdout, "ref_inode%03d: "
+				"file_fid="DFID", "
+				"OST_ino=%lu\n",
+				i, PFID(&(rep_list_buf->list_data[i].fid)),
+				rep_list_buf->list_data[i].ost_ino);
+		else if (rep_list_buf->list_data[i].fid.f_oid == 0) {
+			/* orphan inode */
+			fprintf(stdout, "ref_inode%03d: "
+				"file_fid=<none>, "
+				"OST_ino=%lu\n",
+				i, rep_list_buf->list_data[i].ost_ino);
+		} else if (rep_list_buf->list_data[i].ost_ino == 0) {
+			SNAP_ERR_LCTL_OUT(-(rep_list_buf->
+						list_data[i].fid.f_oid),
+						cmd_name,
+						__func__, __LINE__, "");
+		} else {
+			/* err inode */
+			fprintf(stdout, "ref_inode%03d: "
+				"file_fid=<cannot get fid err=%d>, "
+				"OST_ino=%lu\n",
+				i, -(rep_list_buf->list_data[i].fid.f_oid),
+				rep_list_buf->list_data[i].ost_ino);
+		}
+	}
+	if (rc == -EAGAIN) {
+		fprintf(stdout, "\n"
+			"There are more entries. Delete some entries and try again.\n");
+		return CMD_COMPLETE;
+	}
+	if (rep_list_buf->list_num == 1)
+		fprintf(stdout, "\n"
+			"There is no snapshot referring to this.\n"
+			"You can delete this with \"--delete\" option.\n");
+	return CMD_COMPLETE;
+}
+
+static int jt_snapshot_list_orphan(char *cmd_name, struct obd_ioctl_data *data)
+{
+	char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
+	struct snapshot_list_buf list_buf;
+	struct snapshot_list_buf *rep_list_buf = NULL;
+	int rc = 0, i;
+
+
+	memset(&list_buf, 0, sizeof(list_buf));
+	data->ioc_pbuf1 = (char *)&list_buf;
+	data->ioc_plen1 = sizeof(list_buf);
+
+	memset(buf, 0, sizeof(rawbuf));
+	rc = obd_ioctl_pack(data, &buf, sizeof(rawbuf));
+	if (rc) {
+		SNAP_ERR_LCTL_OUT(rc, cmd_name,
+				  __func__, __LINE__, "");
+		return CMD_INCOMPLETE;
+	}
+	rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SNAPSHOT_ORPHAN, buf);
+	if (rc) {
+		rc = -errno;
+		if (rc == -EBFONT) {
+			fprintf(stderr, SNAP_ERR_LCTL_EINVAL, cmd_name);
+			return CMD_INCOMPLETE;
+		} else if (rc != -EAGAIN) {
+			SNAP_ERR_LCTL_OUT(rc, cmd_name,
+					  __func__, __LINE__, "OST object");
+			return CMD_INCOMPLETE;
+		}
+	}
+	obd_ioctl_unpack(data, buf, MAX_IOC_BUFLEN);
+
+	rep_list_buf = (struct snapshot_list_buf *)(data->ioc_pbuf1);
+
+	for (i = 0; i < rep_list_buf->list_num; i++) {
+		if (rep_list_buf->list_data[i].fid.f_seq != 0)
+			fprintf(stdout, "OST_fid="DFID", "
+				"OST_ino=%lu\n",
+				PFID(&(rep_list_buf->list_data[i].fid)),
+				rep_list_buf->list_data[i].ost_ino);
+		else
+			fprintf(stdout, "OST_fid=<cannot get fid err=%d>, "
+				"OST_ino=%lu\n",
+				-(rep_list_buf->list_data[i].fid.f_oid),
+				rep_list_buf->list_data[i].ost_ino);
+	}
+	if (rc == -EAGAIN) {
+		fprintf(stdout, "\n"
+			"There are more entries. Delete some entries and try again.\n");
+	}
+
+	return CMD_COMPLETE;
+}
+
+int jt_snapshot_orphan(int argc, char **argv)
+{
+	char *cmd_name = jt_cmdname(argv[0]);
+	char *dev_name, *fidstr = NULL;
+	int  c;
+	int flag_f = 0;
+	int sub_cmd = -1;
+	struct lu_fid fid;
+	struct obd_ioctl_data data;
+
+	if (argc < 3)
+		return CMD_HELP;
+
+	static struct option long_options[] = {
+		{
+			.name		= "fid",
+			.has_arg	= required_argument,
+			.flag		= 0,
+			.val		= 'i',
+		},
+		{
+			.name		= "delete",
+			.has_arg	= no_argument,
+			.flag		= 0,
+			.val		= 'd',
+		},
+		{
+			.name		= "list",
+			.has_arg	= no_argument,
+			.flag		= 0,
+			.val		= 'l',
+		},
+		{
+			NULL
+		}
+	};
+
+	while ((c = getopt_long(argc, argv, "i:dlf",
+				long_options, NULL)) != -1) {
+		switch (c) {
+		case 'i':
+			fidstr = optarg;
+			if (sub_cmd != -1 || argc != 4 || !fidstr)
+				return CMD_HELP;
+			sub_cmd = OBD_IOC_SNAPSHOT_ORPHAN_INODE;
+			break;
+		case 'd':
+			if (sub_cmd != -1)
+				return CMD_HELP;
+			sub_cmd = OBD_IOC_SNAPSHOT_ORPHAN_DEL;
+			break;
+		case 'l':
+			if (sub_cmd != -1 || argc != 3)
+				return CMD_HELP;
+			sub_cmd = OBD_IOC_SNAPSHOT_LIST_ORPHAN;
+			break;
+		case 'f':
+			if (flag_f)
+				return CMD_HELP;
+			flag_f = 1;
+			break;
+		default:
+			return CMD_HELP;
+			break;
+		}
+	}
+	if (optind == argc)
+		return CMD_HELP;
+
+	if (sub_cmd == OBD_IOC_SNAPSHOT_ORPHAN_DEL) {
+		if (flag_f && argc != 5)
+			return CMD_HELP;
+		if (!flag_f && argc != 4)
+			return CMD_HELP;
+		fidstr = argv[optind++];
+	}
+
+	if (flag_f && sub_cmd != OBD_IOC_SNAPSHOT_ORPHAN_DEL)
+		return CMD_HELP;
+
+	dev_name = argv[optind];
+
+	if (fidstr) {
+		while (*fidstr == '[')
+			fidstr++;
+		if (sscanf(fidstr, SFID, RFID(&fid)) != 3) {
+			fprintf(stderr, SNAP_ERR_LCTL_EINVAL, cmd_name);
+			return CMD_INCOMPLETE;
+		}
+	}
+	/* check root user */
+	if (geteuid() != 0) {
+		fprintf(stderr, SNAP_ERR_LCTL_OPEPRMIT, cmd_name);
+		return CMD_INCOMPLETE;
+	}
+
+	if (do_device("device", dev_name) != 0) {
+		fprintf(stderr, SNAP_ERR_LCTL_NODEV, cmd_name, dev_name);
+		return CMD_INCOMPLETE;
+	}
+
+	memset(&data, 0, sizeof(data));
+	data.ioc_dev = cur_device;
+	data.ioc_command = sub_cmd;
+	switch (sub_cmd) {
+	case OBD_IOC_SNAPSHOT_ORPHAN_INODE:
+		return jt_snapshot_orphan_inode(cmd_name, &data, &fid);
+	case OBD_IOC_SNAPSHOT_ORPHAN_DEL:
+		return jt_snapshot_orphan_del(cmd_name, &data, &fid, flag_f);
+	case OBD_IOC_SNAPSHOT_LIST_ORPHAN:
+		return jt_snapshot_list_orphan(cmd_name, &data);
+	default:
+		return CMD_HELP;
+	}
+}
 
diff --git a/lustre/utils/obdctl.h b/lustre/utils/obdctl.h
index 3e67e8b..f8566ff 100644
--- a/lustre/utils/obdctl.h
+++ b/lustre/utils/obdctl.h
@@ -92,12 +92,18 @@ int jt_llog_cancel(int argc, char **argv);
 int jt_llog_remove(int argc, char **argv);
 int jt_llog_check(int argc, char **argv);
 
+int jt_snapshot(int argc, char **argv);
+int jt_snapshot_orphan(int argc, char **argv);
+
 struct lustre_cfg;
 int lcfg_ioctl(char * func, int dev_id, struct lustre_cfg *lcfg);
 int lcfg_mgs_ioctl(char *func, int dev_id, struct lustre_cfg *lcfg);
 int parse_devname(char *func, char *name);
 char *jt_cmdname(char *func);
 
+int snapshot_check_and_lock_for_hsm(char *name, char *value);
+int snapshot_unlock_for_hsm(void);
+
 
 /* lustre_cfg.c */
 int lcfg_set_devname(char *name);
diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c
index 3537bcf..bebdd33 100644
--- a/lustre/utils/wiretest.c
+++ b/lustre/utils/wiretest.c
@@ -116,7 +116,9 @@ void lustre_assert_wire_constants(void)
 		 (long long)OST_QUOTACTL);
 	LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, "found %lld\n",
 		 (long long)OST_QUOTA_ADJUST_QUNIT);
-	LASSERTF(OST_LAST_OPC == 21, "found %lld\n",
+	LASSERTF(OST_SNAPSHOT == 32, "found %lld\n",
+		 (long long)OST_SNAPSHOT);
+	LASSERTF(OST_LAST_OPC == 33, "found %lld\n",
 		 (long long)OST_LAST_OPC);
 	LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
 		 OBD_OBJECT_EOF);
@@ -208,7 +210,9 @@ void lustre_assert_wire_constants(void)
 		 (long long)REINT_RMENTRY);
 	LASSERTF(REINT_MIGRATE == 9, "found %lld\n",
 		 (long long)REINT_MIGRATE);
-	LASSERTF(REINT_MAX == 10, "found %lld\n",
+	LASSERTF(REINT_SNAPSHOT == 21, "found %lld\n",
+		 (long long)REINT_SNAPSHOT);
+	LASSERTF(REINT_MAX == 22, "found %lld\n",
 		 (long long)REINT_MAX);
 	LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n",
 		(unsigned)DISP_IT_EXECD);