diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c old mode 100644 new mode 100755 index acd9264..b6287b6 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -1115,6 +1115,25 @@ out: static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which); +/* ra_io_arg will be filled in the beginning of ll_readahead with + * ras_lock, then the following ll_read_ahead_pages will read RA + * pages according to this arg, all the items in this structure are + * counted by page index. + */ +struct ra_io_arg { + unsigned long ria_start; /* start offset of read-ahead*/ + unsigned long ria_end; /* end offset of read-ahead*/ + /* If stride read pattern is detected, ria_stoff means where + * stride read is started. Note: for normal read-ahead, the + * value here is meaningless, and also it will not be accessed*/ + pgoff_t ria_stoff; + /* ria_length and ria_pages are the length and pages length in the + * stride I/O mode. And they will also be used to check whether + * it is stride I/O read-ahead in the read-ahead pages*/ + unsigned long ria_length; + unsigned long ria_pages; +}; + /* WARNING: This algorithm is used to reduce the contention on * sbi->ll_lock. It should work well if the ra_max_pages is much * greater than the single file's read-ahead window. @@ -1125,7 +1144,8 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which); * ll_ra_count_get at the exactly same time. All of them will get a zero ra * window, although the global window is 100M. -jay */ -static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len) +static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, struct ra_io_arg *ria, + unsigned long len) { struct ll_ra_info *ra = &sbi->ll_ra_info; unsigned long ret = 0; @@ -1140,6 +1160,13 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len) if ((int)ret < min((unsigned long)PTLRPC_MAX_BRW_PAGES, len)) GOTO(out, ret = 0); + if (ria->ria_pages == 0) { + if (ret >= ((ria->ria_start + ret) % PTLRPC_MAX_BRW_PAGES)) + ret -= (ria->ria_start + ret) % PTLRPC_MAX_BRW_PAGES; + else + GOTO(out, ret = 0); + } + if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) { atomic_sub(ret, &ra->ra_cur_pages); ret = 0; @@ -1546,25 +1573,6 @@ unlock_page: return rc; } -/* ra_io_arg will be filled in the beginning of ll_readahead with - * ras_lock, then the following ll_read_ahead_pages will read RA - * pages according to this arg, all the items in this structure are - * counted by page index. - */ -struct ra_io_arg { - unsigned long ria_start; /* start offset of read-ahead*/ - unsigned long ria_end; /* end offset of read-ahead*/ - /* If stride read pattern is detected, ria_stoff means where - * stride read is started. Note: for normal read-ahead, the - * value here is meaningless, and also it will not be accessed*/ - pgoff_t ria_stoff; - /* ria_length and ria_pages are the length and pages length in the - * stride I/O mode. And they will also be used to check whether - * it is stride I/O read-ahead in the read-ahead pages*/ - unsigned long ria_length; - unsigned long ria_pages; -}; - #define RIA_DEBUG(ria) \ CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n", \ ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\ @@ -1832,7 +1840,7 @@ static int ll_readahead(struct ll_readahead_state *ras, if (len == 0) RETURN(0); - reserved = ll_ra_count_get(ll_i2sbi(inode), len); + reserved = ll_ra_count_get(ll_i2sbi(inode), &ria, len); if (reserved < len) ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);