diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 978b10c..b3fe523 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -85,10 +85,11 @@ struct lprocfs_seq_vars { }; /* if we find more consumers this could be generalized */ -#define OBD_HIST_MAX 32 struct obd_histogram { spinlock_t oh_lock; - unsigned long oh_buckets[OBD_HIST_MAX]; + unsigned oh_bucket_count; + bool oh_replaced; + unsigned long oh_buckets[0]; }; enum { @@ -110,7 +111,7 @@ enum { }; struct brw_stats { - struct obd_histogram hist[BRW_LAST]; + struct obd_histogram *brw_hist[BRW_LAST]; }; enum { @@ -121,7 +122,7 @@ enum { }; struct rename_stats { - struct obd_histogram hist[RENAME_LAST]; + struct obd_histogram *ren_hist[RENAME_LAST]; }; /* An lprocfs counter can be configured using the enum bit masks below. @@ -808,8 +809,10 @@ extern int lprocfs_write_frac_u64_helper(const char *buffer, __u64 *val, int mult); char *lprocfs_find_named_value(const char *buffer, const char *name, size_t *count); -void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value); -void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value); +int lprocfs_oh_init(struct obd_histogram **oh, int bucket_count); +void lprocfs_oh_fini(struct obd_histogram **oh); +void lprocfs_oh_tally(struct obd_histogram **oh, unsigned int value); +void lprocfs_oh_tally_log2(struct obd_histogram **oh, unsigned int value); void lprocfs_oh_clear(struct obd_histogram *oh); unsigned long lprocfs_oh_sum(struct obd_histogram *oh); @@ -1326,11 +1329,15 @@ int lprocfs_filestotal_seq_show(struct seq_file *m, void *data) static inline int lprocfs_filesfree_seq_show(struct seq_file *m, void *data) { return 0; } -static inline -void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value) +static inline int lprocfs_oh_init(struct obd_histogram **oh, int bucket_count) +{ return 0; } +static inline void lprocfs_oh_fini(struct obd_histogram **oh) { return; } -static inline -void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) +static inline void lprocfs_oh_tally(struct obd_histogram **oh, + unsigned int value) +{ return; } +static inline void lprocfs_oh_tally_log2(struct obd_histogram **oh, + unsigned int value) { return; } static inline void lprocfs_oh_clear(struct obd_histogram *oh) diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index 2ea9e15..84889e1 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -700,7 +700,7 @@ struct l_wait_info { #define LWI_INTR(cb, data) LWI_TIMEOUT_INTR(0, NULL, cb, data) #ifdef __KERNEL__ - +#define log2(n) ffz(~(n)) /* * wait for @condition to become true, but no longer than timeout, specified * by @info. diff --git a/lustre/include/obd.h b/lustre/include/obd.h index b9c2433..e7d2d36 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -328,12 +328,12 @@ struct client_obd { atomic_t cl_pending_r_pages; __u32 cl_max_pages_per_rpc; int cl_max_rpcs_in_flight; - struct obd_histogram cl_read_rpc_hist; - struct obd_histogram cl_write_rpc_hist; - struct obd_histogram cl_read_page_hist; - struct obd_histogram cl_write_page_hist; - struct obd_histogram cl_read_offset_hist; - struct obd_histogram cl_write_offset_hist; + struct obd_histogram *cl_read_rpc_hist; + struct obd_histogram *cl_write_rpc_hist; + struct obd_histogram *cl_read_page_hist; + struct obd_histogram *cl_write_page_hist; + struct obd_histogram *cl_read_offset_hist; + struct obd_histogram *cl_write_offset_hist; /* lru for osc caching pages */ struct cl_client_cache *cl_cache; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index b4c24fc..72425e2 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -363,12 +363,12 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) cli->cl_r_in_flight = 0; cli->cl_w_in_flight = 0; - spin_lock_init(&cli->cl_read_rpc_hist.oh_lock); - spin_lock_init(&cli->cl_write_rpc_hist.oh_lock); - spin_lock_init(&cli->cl_read_page_hist.oh_lock); - spin_lock_init(&cli->cl_write_page_hist.oh_lock); - spin_lock_init(&cli->cl_read_offset_hist.oh_lock); - spin_lock_init(&cli->cl_write_offset_hist.oh_lock); + lprocfs_oh_init(&cli->cl_read_rpc_hist, -1); + lprocfs_oh_init(&cli->cl_write_rpc_hist, -1); + lprocfs_oh_init(&cli->cl_read_page_hist, -1); + lprocfs_oh_init(&cli->cl_write_page_hist, -1); + lprocfs_oh_init(&cli->cl_read_offset_hist, -1); + lprocfs_oh_init(&cli->cl_write_offset_hist, -1); /* lru for osc. */ CFS_INIT_LIST_HEAD(&cli->cl_lru_osc); @@ -484,13 +484,21 @@ EXPORT_SYMBOL(client_obd_setup); int client_obd_cleanup(struct obd_device *obddev) { + struct client_obd *cli = &obddev->u.cli; ENTRY; ldlm_namespace_free_post(obddev->obd_namespace); obddev->obd_namespace = NULL; obd_cleanup_client_import(obddev); - LASSERT(obddev->u.cli.cl_import == NULL); + LASSERT(cli->cl_import == NULL); + + lprocfs_oh_fini(&cli->cl_read_rpc_hist); + lprocfs_oh_fini(&cli->cl_write_rpc_hist); + lprocfs_oh_fini(&cli->cl_read_page_hist); + lprocfs_oh_fini(&cli->cl_write_page_hist); + lprocfs_oh_fini(&cli->cl_read_offset_hist); + lprocfs_oh_fini(&cli->cl_write_offset_hist); ldlm_put_ref(); RETURN(0); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 0ebc2d8..6c2495f 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -374,19 +374,11 @@ struct ra_io_arg { unsigned long ria_pages; }; -/* LL_HIST_MAX=32 causes an overflow */ -#define LL_HIST_MAX 28 -#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */ #define LL_PROCESS_HIST_MAX 10 struct per_process_info { - pid_t pid; - struct obd_histogram pp_r_hist; - struct obd_histogram pp_w_hist; -}; - -/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */ -struct ll_rw_extents_info { - struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1]; + pid_t pid; + struct obd_histogram *pp_r_hist; + struct obd_histogram *pp_w_hist; }; #define LL_OFFSET_HIST_MAX 100 @@ -530,7 +522,7 @@ struct ll_sb_info { struct lu_site *ll_site; struct cl_device *ll_cl; /* Statistics */ - struct ll_rw_extents_info ll_rw_extents_info; + struct per_process_info ll_pp_extents[LL_PROCESS_HIST_MAX]; int ll_extent_process_count; struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX]; unsigned int ll_offset_process_count; diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index ee642db..5c4fa98 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -67,10 +67,6 @@ extern struct address_space_operations ll_aops; extern struct address_space_operations_ext ll_aops; #endif -#ifndef log2 -#define log2(n) ffz(~(n)) -#endif - static struct ll_sb_info *ll_init_sbi(void) { struct ll_sb_info *sbi = NULL; @@ -133,10 +129,8 @@ static struct ll_sb_info *ll_init_sbi(void) for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) { /* Since these structures are not used by default, don't * allocate much space for the histograms yet */ - spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i]. - pp_r_hist.oh_lock); - spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i]. - pp_w_hist.oh_lock); + lprocfs_oh_init(&sbi->ll_pp_extents[i].pp_r_hist, 2); + lprocfs_oh_init(&sbi->ll_pp_extents[i].pp_w_hist, 2); } /* metadata statahead is enabled by default */ diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 18f1406..897ec72 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -1090,41 +1090,40 @@ void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) #define pct(a,b) (b ? a * 100 / b : 0) -static void ll_display_extents_info(struct ll_rw_extents_info *io_extents, - struct seq_file *seq, int which) +static void ll_display_extents_info(struct per_process_info *pp_extents, + struct seq_file *seq, int which) { - unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum; - unsigned long start, end, r, w; - char *unitp = "KMGTPEZY"; - int i, units = 10; - struct per_process_info *pp_info = &io_extents->pp_extents[which]; - - read_cum = 0; - write_cum = 0; - start = 0; - - for(i = 0; i < LL_HIST_MAX; i++) { - read_tot += pp_info->pp_r_hist.oh_buckets[i]; - write_tot += pp_info->pp_w_hist.oh_buckets[i]; - } + unsigned long read_tot, write_tot, read_cum, write_cum; + struct per_process_info *pp_info = &pp_extents[which]; + int i, count; + + read_tot = lprocfs_oh_sum(pp_info->pp_r_hist); + write_tot = lprocfs_oh_sum(pp_info->pp_w_hist); + + spin_lock(&pp_info->pp_r_hist->oh_lock); + count = max(pp_info->pp_r_hist->oh_bucket_count, + pp_info->pp_w_hist->oh_bucket_count); + for (i = 0, read_cum = write_cum = 0; i < count; i++) { + unsigned long r, w; + + if (i < pp_info->pp_r_hist->oh_bucket_count) + r = pp_info->pp_r_hist->oh_buckets[i]; + else + r = 0; + if (i < pp_info->pp_w_hist->oh_bucket_count) + r = pp_info->pp_w_hist->oh_buckets[i]; + else + w = 0; - for(i = 0; i < LL_HIST_MAX; i++) { - r = pp_info->pp_r_hist.oh_buckets[i]; - w = pp_info->pp_w_hist.oh_buckets[i]; - read_cum += r; - write_cum += w; - end = 1 << (i + LL_HIST_START - units); - seq_printf(seq, "%4lu%c - %4lu%c%c: %14lu %4lu %4lu | " - "%14lu %4lu %4lu\n", start, *unitp, end, *unitp, - (i == LL_HIST_MAX - 1) ? '+' : ' ', - r, pct(r, read_tot), pct(read_cum, read_tot), - w, pct(w, write_tot), pct(write_cum, write_tot)); - start = end; - if (start == 1<<10) { - start = 1; - units += 10; - unitp++; - } + read_cum += r; + write_cum += w; + if (read_cum == 0 && write_cum == 0) + continue; + + seq_printf(seq, "%4lu: %14lu %4lu %4lu | %14lu %4lu %4lu\n", + 1UL << i, + r, pct(r, read_tot), pct(read_cum, read_tot), + w, pct(w, write_tot), pct(write_cum, write_tot)); if (read_cum == read_tot && write_cum == write_tot) break; } @@ -1132,31 +1131,30 @@ static void ll_display_extents_info(struct ll_rw_extents_info *io_extents, static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v) { - struct timeval now; - struct ll_sb_info *sbi = seq->private; - struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; + struct timeval now; + struct ll_sb_info *sbi = seq->private; int k; do_gettimeofday(&now); if (!sbi->ll_rw_stats_on) { seq_printf(seq, "disabled\n" - "write anything in this file to activate, " - "then 0 or \"[D/d]isabled\" to deactivate\n"); - return 0; - } - seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", - now.tv_sec, now.tv_usec); - seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write"); - seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", - "extents", "calls", "%", "cum%", - "calls", "%", "cum%"); + "write anything in this file to activate, " + "then 0 or \"[D/d]isabled\" to deactivate\n"); + return 0; + } + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", + now.tv_sec, now.tv_usec); + seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write"); + seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", + "extents", "calls", "%", "cum%", + "calls", "%", "cum%"); spin_lock(&sbi->ll_pp_extent_lock); for (k = 0; k < LL_PROCESS_HIST_MAX; k++) { - if (io_extents->pp_extents[k].pid != 0) { + if (sbi->ll_pp_extents[k].pid != 0) { seq_printf(seq, "\nPID: %d\n", - io_extents->pp_extents[k].pid); - ll_display_extents_info(io_extents, seq, k); + sbi->ll_pp_extents[k].pid); + ll_display_extents_info(&sbi->ll_pp_extents[k], seq, k); } } spin_unlock(&sbi->ll_pp_extent_lock); @@ -1164,30 +1162,28 @@ static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v) } static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file, - const char *buf, size_t len, - loff_t *off) + const char *buf, size_t len, + loff_t *off) { - struct seq_file *seq = file->private_data; - struct ll_sb_info *sbi = seq->private; - struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; - int i; - int value = 1, rc = 0; + struct seq_file *seq = file->private_data; + struct ll_sb_info *sbi = seq->private; + int i; + int value = 1, rc = 0; - rc = lprocfs_write_helper(buf, len, &value); - if (rc < 0 && (strcmp(buf, "disabled") == 0 || - strcmp(buf, "Disabled") == 0)) - value = 0; + rc = lprocfs_write_helper(buf, len, &value); + if (rc < 0 && strcasecmp(buf, "disabled") == 0) + value = 0; - if (value == 0) - sbi->ll_rw_stats_on = 0; - else - sbi->ll_rw_stats_on = 1; + if (value == 0) + sbi->ll_rw_stats_on = 0; + else + sbi->ll_rw_stats_on = 1; spin_lock(&sbi->ll_pp_extent_lock); for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { - io_extents->pp_extents[i].pid = 0; - lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist); - lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist); + sbi->ll_pp_extents[i].pid = 0; + lprocfs_oh_clear(sbi->ll_pp_extents[i].pp_r_hist); + lprocfs_oh_clear(sbi->ll_pp_extents[i].pp_w_hist); } spin_unlock(&sbi->ll_pp_extent_lock); return len; @@ -1199,53 +1195,50 @@ static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v) { struct timeval now; struct ll_sb_info *sbi = seq->private; - struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; do_gettimeofday(&now); if (!sbi->ll_rw_stats_on) { - seq_printf(seq, "disabled\n" - "write anything in this file to activate, " - "then 0 or \"[D/d]isabled\" to deactivate\n"); - return 0; - } - seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", - now.tv_sec, now.tv_usec); + seq_printf(seq, "disabled\n" + "write anything in this file to activate, " + "then 0 or \"[D/d]isabled\" to deactivate\n"); + return 0; + } + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", + now.tv_sec, now.tv_usec); - seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write"); - seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", - "extents", "calls", "%", "cum%", - "calls", "%", "cum%"); + seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write"); + seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", + "extents", "calls", "%", "cum%", + "calls", "%", "cum%"); spin_lock(&sbi->ll_lock); - ll_display_extents_info(io_extents, seq, LL_PROCESS_HIST_MAX); + ll_display_extents_info(sbi->ll_pp_extents, seq, LL_PROCESS_HIST_MAX); spin_unlock(&sbi->ll_lock); return 0; } static ssize_t ll_rw_extents_stats_seq_write(struct file *file, const char *buf, - size_t len, loff_t *off) + size_t len, loff_t *off) { - struct seq_file *seq = file->private_data; - struct ll_sb_info *sbi = seq->private; - struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; - int i; - int value = 1, rc = 0; + struct seq_file *seq = file->private_data; + struct ll_sb_info *sbi = seq->private; + int i; + int value = 1, rc = 0; - rc = lprocfs_write_helper(buf, len, &value); - if (rc < 0 && (strcmp(buf, "disabled") == 0 || - strcmp(buf, "Disabled") == 0)) - value = 0; + rc = lprocfs_write_helper(buf, len, &value); + if (rc < 0 && strcasecmp(buf, "disabled") == 0) + value = 0; - if (value == 0) - sbi->ll_rw_stats_on = 0; - else - sbi->ll_rw_stats_on = 1; + if (value == 0) + sbi->ll_rw_stats_on = 0; + else + sbi->ll_rw_stats_on = 1; spin_lock(&sbi->ll_pp_extent_lock); for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) { - io_extents->pp_extents[i].pid = 0; - lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist); - lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist); + sbi->ll_pp_extents[i].pid = 0; + lprocfs_oh_clear(sbi->ll_pp_extents[i].pp_r_hist); + lprocfs_oh_clear(sbi->ll_pp_extents[i].pp_w_hist); } spin_unlock(&sbi->ll_pp_extent_lock); @@ -1255,49 +1248,50 @@ static ssize_t ll_rw_extents_stats_seq_write(struct file *file, const char *buf, LPROC_SEQ_FOPS(ll_rw_extents_stats); void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, - struct ll_file_data *file, loff_t pos, - size_t count, int rw) + struct ll_file_data *file, loff_t pos, + size_t count, int rw) { - int i, cur = -1; - struct ll_rw_process_info *process; - struct ll_rw_process_info *offset; - int *off_count = &sbi->ll_rw_offset_entry_count; - int *process_count = &sbi->ll_offset_process_count; - struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; - - if(!sbi->ll_rw_stats_on) - return; - process = sbi->ll_rw_process_info; - offset = sbi->ll_rw_offset_info; + int i, cur = -1; + struct ll_rw_process_info *process; + struct ll_rw_process_info *offset; + int *off_count = &sbi->ll_rw_offset_entry_count; + int *process_count = &sbi->ll_offset_process_count; + + if (!sbi->ll_rw_stats_on) + return; + + process = sbi->ll_rw_process_info; + offset = sbi->ll_rw_offset_info; spin_lock(&sbi->ll_pp_extent_lock); - /* Extent statistics */ - for(i = 0; i < LL_PROCESS_HIST_MAX; i++) { - if(io_extents->pp_extents[i].pid == pid) { - cur = i; - break; - } - } + /* Extent statistics */ + for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { + if (sbi->ll_pp_extents[i].pid == pid) { + cur = i; + break; + } + } - if (cur == -1) { - /* new process */ - sbi->ll_extent_process_count = - (sbi->ll_extent_process_count + 1) % LL_PROCESS_HIST_MAX; - cur = sbi->ll_extent_process_count; - io_extents->pp_extents[cur].pid = pid; - lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_r_hist); - lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist); - } + if (cur == -1) { + /* new process */ + sbi->ll_extent_process_count = (sbi->ll_extent_process_count + + 1) % LL_PROCESS_HIST_MAX; + cur = sbi->ll_extent_process_count; + sbi->ll_pp_extents[cur].pid = pid; + lprocfs_oh_clear(sbi->ll_pp_extents[cur].pp_r_hist); + lprocfs_oh_clear(sbi->ll_pp_extents[cur].pp_w_hist); + } - for(i = 0; (count >= (1 << LL_HIST_START << i)) && - (i < (LL_HIST_MAX - 1)); i++); - if (rw == 0) { - io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++; - io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++; - } else { - io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++; - io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++; - } + i = log2(count); + if (rw == 0) { + lprocfs_oh_tally(&sbi->ll_pp_extents[cur].pp_r_hist, i); + lprocfs_oh_tally(&sbi->ll_pp_extents[LL_PROCESS_HIST_MAX]. + pp_r_hist, i); + } else { + lprocfs_oh_tally(&sbi->ll_pp_extents[cur].pp_w_hist, i); + lprocfs_oh_tally(&sbi->ll_pp_extents[LL_PROCESS_HIST_MAX]. + pp_w_hist, i); + } spin_unlock(&sbi->ll_pp_extent_lock); spin_lock(&sbi->ll_process_lock); diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index 0ee5148..38490de 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -84,34 +84,36 @@ #define pct(a, b) (b ? a * 100 / b : 0) static void display_rename_stats(struct seq_file *seq, char *name, - struct obd_histogram *hist) + struct obd_histogram *hist) { - unsigned long tot, t, cum = 0; - int i; + unsigned long tot, t, cum = 0; + int i; - tot = lprocfs_oh_sum(hist); - if (tot > 0) - seq_printf(seq, "- %-15s\n", name); - /* dir size start from 4K, start i from 10(2^10) here */ - for (i = 0; i < OBD_HIST_MAX; i++) { - t = hist->oh_buckets[i]; - cum += t; - if (cum == 0) - continue; + tot = lprocfs_oh_sum(hist); + if (tot > 0) + seq_printf(seq, "- %-15s\n", name); + /* dir size start from 4K, start i from 10(2^10) here */ + spin_lock(&hist->oh_lock); + for (i = 0; i < hist->oh_bucket_count; i++) { + t = hist->oh_buckets[i]; + cum += t; + if (cum == 0) + continue; - if (i < 10) - seq_printf(seq, "%6s%d%s", " ", 1<< i, "bytes:"); - else if (i < 20) - seq_printf(seq, "%6s%d%s", " ", 1<<(i-10), "KB:"); - else - seq_printf(seq, "%6s%d%s", " ", 1<<(i-20), "MB:"); + if (i < 10) + seq_printf(seq, "%6s%d%s", " ", 1 << i, "bytes:"); + else if (i < 20) + seq_printf(seq, "%6s%d%s", " ", 1 << (i - 10), "KB:"); + else + seq_printf(seq, "%6s%d%s", " ", 1 << (i - 20), "MB:"); - seq_printf(seq, " { sample: %3lu, pct: %3lu, cum_pct: %3lu }\n", - t, pct(t, tot), pct(cum, tot)); + seq_printf(seq, " { sample: %3lu, pct: %3lu, cum_pct: %3lu }\n", + t, pct(t, tot), pct(cum, tot)); - if (cum == tot) - break; - } + if (cum == tot) + break; + } + spin_unlock(&hist->oh_lock); } static void rename_stats_show(struct seq_file *seq, @@ -126,11 +128,11 @@ static void rename_stats_show(struct seq_file *seq, now.tv_sec, now.tv_usec); - display_rename_stats(seq, "same_dir", - &rename_stats->hist[RENAME_SAMEDIR_SIZE]); - display_rename_stats(seq, "crossdir_src", - &rename_stats->hist[RENAME_CROSSDIR_SRC_SIZE]); - display_rename_stats(seq, "crossdir_tgt", - &rename_stats->hist[RENAME_CROSSDIR_TGT_SIZE]); + display_rename_stats(seq, "same_dir", + rename_stats->ren_hist[RENAME_SAMEDIR_SIZE]); + display_rename_stats(seq, "crossdir_src", + rename_stats->ren_hist[RENAME_CROSSDIR_SRC_SIZE]); + display_rename_stats(seq, "crossdir_tgt", + rename_stats->ren_hist[RENAME_CROSSDIR_TGT_SIZE]); } #undef pct @@ -152,8 +154,. @@ static ssize_t mdt_rename_stats_seq_write(struct file *file, const char *buf, int i; - for (i = 0; i < RENAME_LAST; i++) - lprocfs_oh_clear(&mdt->mdt_rename_stats.hist[i]); + for (i = 0; i < RENAME_LAST; i++) + lprocfs_oh_clear(mdt->mdt_rename_stats.ren_hist[i]); - return len; + return len; } @@ -164,7 +166,7 @@ static int lproc_mdt_attach_rename_seqstat(struct mdt_device *mdt) int i; for (i = 0; i < RENAME_LAST; i++) - spin_lock_init(&mdt->mdt_rename_stats.hist[i].oh_lock); + lprocfs_oh_init(&mdt->mdt_rename_stats.ren_hist[i], -1); return lprocfs_obd_seq_create(mdt2obd_dev(mdt), "rename_stats", 0644, &mdt_rename_stats_fops, mdt); @@ -191,13 +193,13 @@ void mdt_rename_counter_tally(struct mdt_thread_info *info, if (src == tgt) { mdt_counter_incr(req, LPROC_MDT_SAMEDIR_RENAME); - lprocfs_oh_tally_log2(&rstats->hist[RENAME_SAMEDIR_SIZE], - (unsigned int)ma->ma_attr.la_size); - return; - } + lprocfs_oh_tally_log2(&rstats->ren_hist[RENAME_SAMEDIR_SIZE], + (unsigned int)ma->ma_attr.la_size); + return; + } mdt_counter_incr(req, LPROC_MDT_CROSSDIR_RENAME); - lprocfs_oh_tally_log2(&rstats->hist[RENAME_CROSSDIR_SRC_SIZE], - (unsigned int)ma->ma_attr.la_size); + lprocfs_oh_tally_log2(&rstats->ren_hist[RENAME_CROSSDIR_SRC_SIZE], + (unsigned int)ma->ma_attr.la_size); ma->ma_need = MA_INODE; @@ -209,7 +211,7 @@ void mdt_rename_counter_tally(struct mdt_thread_info *info, return; } - lprocfs_oh_tally_log2(&rstats->hist[RENAME_CROSSDIR_TGT_SIZE], - (unsigned int)ma->ma_attr.la_size); + lprocfs_oh_tally_log2(&rstats->ren_hist[RENAME_CROSSDIR_TGT_SIZE], + (unsigned int)ma->ma_attr.la_size); } diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 973fd0b..d8f53e6 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -3273,43 +3273,157 @@ int lprocfs_obd_seq_create(struct obd_device *dev, } EXPORT_SYMBOL(lprocfs_obd_seq_create); -void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value) +/** + * Allocate and initialize an OBD histogram structure. + * + * This may grow from the initial allocation depending on how many buckets + * are needed. If @bucket_count is specified as -1 a default number of + * buckets is allocated, otherwise the given number of buckets is allocated. + */ +int lprocfs_oh_init(struct obd_histogram **ohptr, int bucket_count) { - if (value >= OBD_HIST_MAX) - value = OBD_HIST_MAX - 1; + struct obd_histogram *oh; + + LASSERT(ohptr != NULL); + + /* 12 is somewhat arbitrary, but most tables have about 12 + * entries or a multiple thereof on my system, and it leaves + * room for the rest of obd_histogram in a power-of-two alloc. */ + if (bucket_count < -1) + bucket_count = 12; + OBD_ALLOC(oh, offsetof(typeof(*oh), oh_buckets[bucket_count])); + if (oh == NULL) + return -ENOMEM; + spin_lock_init(&oh->oh_lock); + oh->oh_bucket_count = bucket_count; + + *ohptr = oh; + + return 0; +} +EXPORT_SYMBOL(lprocfs_oh_init); + +void lprocfs_oh_fini(struct obd_histogram **ohptr) +{ + struct obd_histogram *oh; + + if (ohptr == NULL || *ohptr == NULL) + return; + + oh = *ohptr; + OBD_FREE(oh, offsetof(typeof(*oh), oh_buckets[oh->oh_bucket_count])); + *ohptr = NULL; +} +EXPORT_SYMBOL(lprocfs_oh_fini); +/** + * Increment the histogram bucket for the supplied @value. + * + * If there are not enough buckets, allocate a larger array and copy over + * the current values. This is made more tricky because "oh" also holds + * the spinlock that protects the buckets. If the array is replaced, be + * sure that any threads blocked on the old oh_lock get a chance to wake + * up and drop the lock before freeing the array. + */ +void lprocfs_oh_tally(struct obd_histogram **ohptr, unsigned int value) +{ + struct obd_histogram *newoh = NULL; + struct obd_histogram *oh = *ohptr; + + /* if allocation fails, accumulate in the last bucket */ + if (unlikely(value >= oh->oh_bucket_count && + lprocfs_oh_init(&newoh, oh->oh_bucket_count + 16) < 0)) + value = oh->oh_bucket_count - 1; + +relock: spin_lock(&oh->oh_lock); + /* It is possible that threads blocked on the old oh_lock while it + * was replaced. Unlock and refetch the new pointer and try again. */ + if (unlikely(oh->oh_replaced)) { + spin_unlock(&oh->oh_lock); + oh = *ohptr; + goto relock; + } + + /* If a thread arrives here after allocating its own array and + * blocking on oh_lock while another thread replaced ohptr, then + * either this thread still has a larger array to be installed, + * or the new array is already big enough and newoh can be freed. */ + if (unlikely(newoh != NULL && value >= oh->oh_bucket_count)) { + spin_lock(&newoh->oh_lock); + memcpy(newoh->oh_buckets, oh->oh_buckets, + sizeof(oh->oh_buckets[0]) * oh->oh_bucket_count); + oh->oh_replaced = true; + *ohptr = newoh; + newoh = oh; + oh = *ohptr; + /* + * Other callers may be blocked on the old oh->oh_lock. + * When they get that lock they will see oh->oh_replaced set + * on the old oh and retry with the lock on newoh just stored + * into ohptr above, for which we also just grabbed the lock. + * The unlock "newoh" below is really the old "oh" since the + * pointers were just swapped above. It is done this way to + * avoid confsing static code analysis and developers, since + * it keeps the "newoh" and "oh" lock/unlock calls balanced. + */ + spin_unlock(&newoh->oh_lock); + } + oh->oh_buckets[value]++; spin_unlock(&oh->oh_lock); + + if (unlikely(newoh != NULL)) { + /* + * This might be the old "oh" being freed. In that case + * this thread needs to block until other threads holding + * the old oh_lock are finished with it. Grab the old lock + * again to block until other holders are finished with it. + * This assumes that spin_lock() is fair queue and once the + * lock is held below there cannot be more users. If not, + * then it might be necessary to insert a reschedule loop + * until all of the lock holders are gone. + */ + if (newoh->oh_replaced) { + spin_lock(&newoh->oh_lock); + spin_unlock(&newoh->oh_lock); + } + lprocfs_oh_fini(&newoh); + } } EXPORT_SYMBOL(lprocfs_oh_tally); -void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) +void lprocfs_oh_tally_log2(struct obd_histogram **ohptr, unsigned int value) { unsigned int val = 0; if (likely(value != 0)) - val = min(fls(value - 1), OBD_HIST_MAX); + val = fls(value - 1); - lprocfs_oh_tally(oh, val); + lprocfs_oh_tally(ohptr, val); } EXPORT_SYMBOL(lprocfs_oh_tally_log2); unsigned long lprocfs_oh_sum(struct obd_histogram *oh) { - unsigned long ret = 0; - int i; + unsigned long ret = 0; + int i; - for (i = 0; i < OBD_HIST_MAX; i++) - ret += oh->oh_buckets[i]; - return ret; + spin_lock(&oh->oh_lock); + for (i = 0; i < oh->oh_bucket_count; i++) + ret += oh->oh_buckets[i]; + spin_unlock(&oh->oh_lock); + + return ret; } EXPORT_SYMBOL(lprocfs_oh_sum); void lprocfs_oh_clear(struct obd_histogram *oh) { spin_lock(&oh->oh_lock); - memset(oh->oh_buckets, 0, sizeof(oh->oh_buckets)); + memset(oh->oh_buckets, 0, + sizeof(oh->oh_buckets[0]) * oh->oh_bucket_count); spin_unlock(&oh->oh_lock); } EXPORT_SYMBOL(lprocfs_oh_clear); diff --git a/lustre/obdclass/obd_mount_server.c b/lustre/obdclass/obd_mount_server.c index 8c3e8f2..9e52e16 100644 --- a/lustre/obdclass/obd_mount_server.c +++ b/lustre/obdclass/obd_mount_server.c @@ -1629,7 +1629,6 @@ const struct inode_operations server_inode_operations = { .listxattr = lustre_listxattr, }; -#define log2(n) ffz(~(n)) #define LUSTRE_SUPER_MAGIC 0x0BD00BD1 static int server_fill_super_common(struct super_block *sb) diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index ccfc212..e481def 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -617,7 +617,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) struct obd_device *dev = seq->private; struct client_obd *cli = &dev->u.cli; unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum; - int i; + int i, count; do_gettimeofday(&now); @@ -638,14 +638,23 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "pages per rpc rpcs %% cum %% |"); seq_printf(seq, " rpcs %% cum %%\n"); - read_tot = lprocfs_oh_sum(&cli->cl_read_page_hist); - write_tot = lprocfs_oh_sum(&cli->cl_write_page_hist); + read_tot = lprocfs_oh_sum(cli->cl_read_page_hist); + write_tot = lprocfs_oh_sum(cli->cl_write_page_hist); - read_cum = 0; - write_cum = 0; - for (i = 0; i < OBD_HIST_MAX; i++) { - unsigned long r = cli->cl_read_page_hist.oh_buckets[i]; - unsigned long w = cli->cl_write_page_hist.oh_buckets[i]; + spin_lock(&cli->cl_read_page_hist->oh_lock); + count = max(cli->cl_write_page_hist->oh_bucket_count, + cli->cl_read_page_hist->oh_bucket_count); + for (i = 0, read_cum = write_cum = 0; i < count; i++) { + unsigned long r, w; + + if (i < cli->cl_read_page_hist->oh_bucket_count) + r = cli->cl_read_page_hist->oh_buckets[i]; + else + r = 0; + if (i < cli->cl_write_page_hist->oh_bucket_count) + w = cli->cl_write_page_hist->oh_buckets[i]; + else + w = 0; read_cum += r; write_cum += w; @@ -657,55 +666,78 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) if (read_cum == read_tot && write_cum == write_tot) break; } + spin_unlock(&cli->cl_read_page_hist->oh_lock); seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); seq_printf(seq, "rpcs in flight rpcs %% cum %% |"); seq_printf(seq, " rpcs %% cum %%\n"); - read_tot = lprocfs_oh_sum(&cli->cl_read_rpc_hist); - write_tot = lprocfs_oh_sum(&cli->cl_write_rpc_hist); - - read_cum = 0; - write_cum = 0; - for (i = 0; i < OBD_HIST_MAX; i++) { - unsigned long r = cli->cl_read_rpc_hist.oh_buckets[i]; - unsigned long w = cli->cl_write_rpc_hist.oh_buckets[i]; - read_cum += r; - write_cum += w; - seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - i, r, pct(r, read_tot), - pct(read_cum, read_tot), w, - pct(w, write_tot), - pct(write_cum, write_tot)); - if (read_cum == read_tot && write_cum == write_tot) - break; - } + read_tot = lprocfs_oh_sum(cli->cl_read_rpc_hist); + write_tot = lprocfs_oh_sum(cli->cl_write_rpc_hist); - seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_printf(seq, "offset rpcs %% cum %% |"); - seq_printf(seq, " rpcs %% cum %%\n"); - - read_tot = lprocfs_oh_sum(&cli->cl_read_offset_hist); - write_tot = lprocfs_oh_sum(&cli->cl_write_offset_hist); - - read_cum = 0; - write_cum = 0; - for (i = 0; i < OBD_HIST_MAX; i++) { - unsigned long r = cli->cl_read_offset_hist.oh_buckets[i]; - unsigned long w = cli->cl_write_offset_hist.oh_buckets[i]; - read_cum += r; - write_cum += w; - seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - (i == 0) ? 0 : 1 << (i - 1), - r, pct(r, read_tot), pct(read_cum, read_tot), - w, pct(w, write_tot), pct(write_cum, write_tot)); - if (read_cum == read_tot && write_cum == write_tot) - break; - } + spin_lock(&cli->cl_read_rpc_hist->oh_lock); + count = max(cli->cl_write_rpc_hist->oh_bucket_count, + cli->cl_read_rpc_hist->oh_bucket_count); + for (i = 0, read_cum = write_cum = 0; i < count; i++) { + unsigned long r, w; - client_obd_list_unlock(&cli->cl_loi_list_lock); + if (i < cli->cl_read_rpc_hist->oh_bucket_count) + r = cli->cl_read_rpc_hist->oh_buckets[i]; + else + r = 0; + if (i < cli->cl_write_rpc_hist->oh_bucket_count) + w = cli->cl_write_rpc_hist->oh_buckets[i]; + else + w = 0; + + read_cum += r; + write_cum += w; + seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", + i, r, pct(r, read_tot), + pct(read_cum, read_tot), w, + pct(w, write_tot), + pct(write_cum, write_tot)); + if (read_cum == read_tot && write_cum == write_tot) + break; + } + spin_unlock(&cli->cl_read_rpc_hist->oh_lock); + + seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); + seq_printf(seq, "offset rpcs %% cum %% |"); + seq_printf(seq, " rpcs %% cum %%\n"); + + read_tot = lprocfs_oh_sum(cli->cl_read_offset_hist); + write_tot = lprocfs_oh_sum(cli->cl_write_offset_hist); - return 0; + spin_lock(&cli->cl_read_offset_hist->oh_lock); + count = max(cli->cl_write_offset_hist->oh_bucket_count, + cli->cl_read_offset_hist->oh_bucket_count); + for (i = 0, read_cum = write_cum = 0; i < count; i++) { + unsigned long r, w; + + if (i < cli->cl_read_offset_hist->oh_bucket_count) + r = cli->cl_read_offset_hist->oh_buckets[i]; + else + r = 0; + if (i < cli->cl_write_offset_hist->oh_bucket_count) + w = cli->cl_write_offset_hist->oh_buckets[i]; + else + w = 0; + + read_cum += r; + write_cum += w; + seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", + (i == 0) ? 0 : 1 << (i - 1), + r, pct(r, read_tot), pct(read_cum, read_tot), + w, pct(w, write_tot), pct(write_cum, write_tot)); + if (read_cum == read_tot && write_cum == write_tot) + break; + } + spin_unlock(&cli->cl_read_offset_hist->oh_lock); + + client_obd_list_unlock(&cli->cl_loi_list_lock); + + return 0; } #undef pct @@ -716,12 +748,12 @@ static ssize_t osc_rpc_stats_seq_write(struct file *file, const char *buf, struct obd_device *dev = seq->private; struct client_obd *cli = &dev->u.cli; - lprocfs_oh_clear(&cli->cl_read_rpc_hist); - lprocfs_oh_clear(&cli->cl_write_rpc_hist); - lprocfs_oh_clear(&cli->cl_read_page_hist); - lprocfs_oh_clear(&cli->cl_write_page_hist); - lprocfs_oh_clear(&cli->cl_read_offset_hist); - lprocfs_oh_clear(&cli->cl_write_offset_hist); + lprocfs_oh_clear(cli->cl_read_rpc_hist); + lprocfs_oh_clear(cli->cl_write_rpc_hist); + lprocfs_oh_clear(cli->cl_read_page_hist); + lprocfs_oh_clear(cli->cl_write_page_hist); + lprocfs_oh_clear(cli->cl_read_offset_hist); + lprocfs_oh_clear(cli->cl_write_offset_hist); - return len; + return len; } diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index dc52885..1103fce 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -619,7 +619,7 @@ static inline int __osd_xattr_set(struct osd_thread_info *info, extern struct lprocfs_vars lprocfs_osd_obd_vars[]; extern struct lprocfs_vars lprocfs_osd_module_vars[]; int osd_procfs_init(struct osd_device *osd, const char *name); -int osd_procfs_fini(struct osd_device *osd); +void osd_procfs_fini(struct osd_device *osd); void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf); #endif diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 8600bfa..97989fd 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -127,18 +127,18 @@ static void osd_iobuf_add_page(struct osd_iobuf *iobuf, struct page *page) void osd_fini_iobuf(struct osd_device *d, struct osd_iobuf *iobuf) { - int rw = iobuf->dr_rw; - - if (iobuf->dr_elapsed_valid) { - iobuf->dr_elapsed_valid = 0; - LASSERT(iobuf->dr_dev == d); - LASSERT(iobuf->dr_frags > 0); - lprocfs_oh_tally(&d->od_brw_stats. - hist[BRW_R_DIO_FRAGS+rw], - iobuf->dr_frags); - lprocfs_oh_tally_log2(&d->od_brw_stats.hist[BRW_R_IO_TIME+rw], - iobuf->dr_elapsed); - } + int rw = iobuf->dr_rw; + + if (iobuf->dr_elapsed_valid) { + iobuf->dr_elapsed_valid = 0; + LASSERT(iobuf->dr_dev == d); + LASSERT(iobuf->dr_frags > 0); + lprocfs_oh_tally(&d->od_brw_stats.brw_hist[BRW_R_DIO_FRAGS+rw], + iobuf->dr_frags); + lprocfs_oh_tally_log2(&d->od_brw_stats. + brw_hist[BRW_R_IO_TIME + rw], + iobuf->dr_elapsed); + } } #ifndef REQ_WRITE /* pre-2.6.35 */ @@ -213,21 +213,21 @@ static void dio_complete_routine(struct bio *bio, int error) static void record_start_io(struct osd_iobuf *iobuf, int size) { struct osd_device *osd = iobuf->dr_dev; - struct obd_histogram *h = osd->od_brw_stats.hist; + struct obd_histogram **oh = osd->od_brw_stats.brw_hist; iobuf->dr_frags++; atomic_inc(&iobuf->dr_numreqs); if (iobuf->dr_rw == 0) { atomic_inc(&osd->od_r_in_flight); - lprocfs_oh_tally(&h[BRW_R_RPC_HIST], + lprocfs_oh_tally(&oh[BRW_R_RPC_HIST], atomic_read(&osd->od_r_in_flight)); - lprocfs_oh_tally_log2(&h[BRW_R_DISK_IOSIZE], size); + lprocfs_oh_tally_log2(&oh[BRW_R_DISK_IOSIZE], size); } else if (iobuf->dr_rw == 1) { atomic_inc(&osd->od_w_in_flight); - lprocfs_oh_tally(&h[BRW_W_RPC_HIST], + lprocfs_oh_tally(&oh[BRW_W_RPC_HIST], atomic_read(&osd->od_w_in_flight)); - lprocfs_oh_tally_log2(&h[BRW_W_DISK_IOSIZE], size); + lprocfs_oh_tally_log2(&oh[BRW_W_DISK_IOSIZE], size); } else { LBUG(); } diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index b9b4e3d..3780ca5 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -49,78 +49,96 @@ void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf) { - struct brw_stats *s = &osd->od_brw_stats; - unsigned long *last_block = NULL; - struct page **pages = iobuf->dr_pages; - struct page *last_page = NULL; - unsigned long discont_pages = 0; - unsigned long discont_blocks = 0; - unsigned long *blocks = iobuf->dr_blocks; - int i, nr_pages = iobuf->dr_npages; - int blocks_per_page; - int rw = iobuf->dr_rw; - - if (unlikely(nr_pages == 0)) - return; + struct brw_stats *brw_stats = &osd->od_brw_stats; + unsigned long *last_block = NULL; + struct page **pages = iobuf->dr_pages; + struct page *last_page = NULL; + unsigned long discont_pages = 0; + unsigned long discont_blocks = 0; + unsigned long *blocks = iobuf->dr_blocks; + int i, nr_pages = iobuf->dr_npages; + int blocks_per_page; + int rw = iobuf->dr_rw; + + if (unlikely(nr_pages == 0)) + return; blocks_per_page = PAGE_CACHE_SIZE >> osd_sb(osd)->s_blocksize_bits; - lprocfs_oh_tally_log2(&s->hist[BRW_R_PAGES+rw], nr_pages); - - while (nr_pages-- > 0) { - if (last_page && (*pages)->index != (last_page->index + 1)) - discont_pages++; - last_page = *pages; - pages++; - for (i = 0; i < blocks_per_page; i++) { - if (last_block && *blocks != (*last_block + 1)) - discont_blocks++; - last_block = blocks++; - } - } - - lprocfs_oh_tally(&s->hist[BRW_R_DISCONT_PAGES+rw], discont_pages); - lprocfs_oh_tally(&s->hist[BRW_R_DISCONT_BLOCKS+rw], discont_blocks); + lprocfs_oh_tally_log2(&brw_stats->brw_hist[BRW_R_PAGES + rw], nr_pages); + + while (nr_pages-- > 0) { + if (last_page && (*pages)->index != (last_page->index + 1)) + discont_pages++; + last_page = *pages; + pages++; + for (i = 0; i < blocks_per_page; i++) { + if (last_block && *blocks != (*last_block + 1)) + discont_blocks++; + last_block = blocks++; + } + } + + lprocfs_oh_tally(&brw_stats->brw_hist[BRW_R_DISCONT_PAGES + rw], + discont_pages); + lprocfs_oh_tally(&brw_stats->brw_hist[BRW_R_DISCONT_BLOCKS + rw], + discont_blocks); } #define pct(a, b) (b ? a * 100 / b : 0) static void display_brw_stats(struct seq_file *seq, char *name, char *units, - struct obd_histogram *read, struct obd_histogram *write, int scale) + struct obd_histogram *read, + struct obd_histogram *write, int scale) { - unsigned long read_tot, write_tot, r, w, read_cum = 0, write_cum = 0; - int i; - - seq_printf(seq, "\n%26s read | write\n", " "); - seq_printf(seq, "%-22s %-5s %% cum %% | %-11s %% cum %%\n", - name, units, units); - - read_tot = lprocfs_oh_sum(read); - write_tot = lprocfs_oh_sum(write); - for (i = 0; i < OBD_HIST_MAX; i++) { - r = read->oh_buckets[i]; - w = write->oh_buckets[i]; - read_cum += r; - write_cum += w; - if (read_cum == 0 && write_cum == 0) - continue; - - if (!scale) - seq_printf(seq, "%u", i); - else if (i < 10) - seq_printf(seq, "%u", scale << i); - else if (i < 20) - seq_printf(seq, "%uK", scale << (i-10)); - else - seq_printf(seq, "%uM", scale << (i-20)); - - seq_printf(seq, ":\t\t%10lu %3lu %3lu | %4lu %3lu %3lu\n", - r, pct(r, read_tot), pct(read_cum, read_tot), - w, pct(w, write_tot), pct(write_cum, write_tot)); - - if (read_cum == read_tot && write_cum == write_tot) - break; - } + unsigned long read_tot, write_tot, read_cum = 0, write_cum = 0; + int count; + int i; + + seq_printf(seq, "\n%26s read | write\n", " "); + seq_printf(seq, "%-22s %-5s %% cum %% | %-11s %% cum %%\n", + name, units, units); + + read_tot = lprocfs_oh_sum(read); + write_tot = lprocfs_oh_sum(write); + + spin_lock(&read->oh_lock); + count = max(read->oh_bucket_count, write->oh_bucket_count); + for (i = 0; i < count; i++) { + unsigned long r, w; + + if (i < read->oh_bucket_count) + r = read->oh_buckets[i]; + else + r = 0; + + if (i < write->oh_bucket_count) + w = write->oh_buckets[i]; + else + w = 0; + + read_cum += r; + write_cum += w; + if (read_cum == 0 && write_cum == 0) + continue; + + if (!scale) + seq_printf(seq, "%u", i); + else if (i < 10) + seq_printf(seq, "%u", scale << i); + else if (i < 20) + seq_printf(seq, "%uK", scale << (i - 10)); + else + seq_printf(seq, "%uM", scale << (i - 20)); + + seq_printf(seq, ":\t\t%10lu %3lu %3lu | %4lu %3lu %3lu\n", + r, pct(r, read_tot), pct(read_cum, read_tot), + w, pct(w, write_tot), pct(write_cum, write_tot)); + + if (read_cum == read_tot && write_cum == write_tot) + break; + } + spin_unlock(&read->oh_lock); } static void brw_stats_show(struct seq_file *seq, struct brw_stats *brw_stats) @@ -132,33 +150,33 @@ static void brw_stats_show(struct seq_file *seq, struct brw_stats *brw_stats) seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", now.tv_sec, now.tv_usec); - display_brw_stats(seq, "pages per bulk r/w", "rpcs", - &brw_stats->hist[BRW_R_PAGES], - &brw_stats->hist[BRW_W_PAGES], 1); + display_brw_stats(seq, "pages per bulk r/w", "rpcs", + brw_stats->brw_hist[BRW_R_PAGES], + brw_stats->brw_hist[BRW_W_PAGES], 1); - display_brw_stats(seq, "discontiguous pages", "rpcs", - &brw_stats->hist[BRW_R_DISCONT_PAGES], - &brw_stats->hist[BRW_W_DISCONT_PAGES], 0); + display_brw_stats(seq, "discontiguous pages", "rpcs", + brw_stats->brw_hist[BRW_R_DISCONT_PAGES], + brw_stats->brw_hist[BRW_W_DISCONT_PAGES], 0); - display_brw_stats(seq, "discontiguous blocks", "rpcs", - &brw_stats->hist[BRW_R_DISCONT_BLOCKS], - &brw_stats->hist[BRW_W_DISCONT_BLOCKS], 0); + display_brw_stats(seq, "discontiguous blocks", "rpcs", + brw_stats->brw_hist[BRW_R_DISCONT_BLOCKS], + brw_stats->brw_hist[BRW_W_DISCONT_BLOCKS], 0); - display_brw_stats(seq, "disk fragmented I/Os", "ios", - &brw_stats->hist[BRW_R_DIO_FRAGS], - &brw_stats->hist[BRW_W_DIO_FRAGS], 0); + display_brw_stats(seq, "disk fragmented I/Os", "ios", + brw_stats->brw_hist[BRW_R_DIO_FRAGS], + brw_stats->brw_hist[BRW_W_DIO_FRAGS], 0); - display_brw_stats(seq, "disk I/Os in flight", "ios", - &brw_stats->hist[BRW_R_RPC_HIST], - &brw_stats->hist[BRW_W_RPC_HIST], 0); + display_brw_stats(seq, "disk I/Os in flight", "ios", + brw_stats->brw_hist[BRW_R_RPC_HIST], + brw_stats->brw_hist[BRW_W_RPC_HIST], 0); display_brw_stats(seq, "I/O time (1/1000s)", "ios", - &brw_stats->hist[BRW_R_IO_TIME], - &brw_stats->hist[BRW_W_IO_TIME], 1000 / HZ); + brw_stats->brw_hist[BRW_R_IO_TIME], + brw_stats->brw_hist[BRW_W_IO_TIME], 1000 / HZ); - display_brw_stats(seq, "disk I/O size", "ios", - &brw_stats->hist[BRW_R_DISK_IOSIZE], - &brw_stats->hist[BRW_W_DISK_IOSIZE], 1); + display_brw_stats(seq, "disk I/O size", "ios", + brw_stats->brw_hist[BRW_R_DISK_IOSIZE], + brw_stats->brw_hist[BRW_W_DISK_IOSIZE], 1); } #undef pct @@ -175,66 +193,69 @@ static int osd_brw_stats_seq_show(struct seq_file *seq, void *v) static ssize_t osd_brw_stats_seq_write(struct file *file, const char *buf, size_t len, loff_t *off) { - struct seq_file *seq = file->private_data; - struct osd_device *osd = seq->private; - int i; + struct seq_file *seq = file->private_data; + struct osd_device *osd = seq->private; + int i; - for (i = 0; i < BRW_LAST; i++) - lprocfs_oh_clear(&osd->od_brw_stats.hist[i]); + for (i = 0; i < BRW_LAST; i++) + lprocfs_oh_clear(osd->od_brw_stats.brw_hist[i]); - return len; + return len; } LPROC_SEQ_FOPS(osd_brw_stats); static int osd_stats_init(struct osd_device *osd) { - int i, result; - ENTRY; - - for (i = 0; i < BRW_LAST; i++) - spin_lock_init(&osd->od_brw_stats.hist[i].oh_lock); - - osd->od_stats = lprocfs_alloc_stats(LPROC_OSD_LAST, 0); - if (osd->od_stats != NULL) { - result = lprocfs_register_stats(osd->od_proc_entry, "stats", - osd->od_stats); - if (result) - GOTO(out, result); - - lprocfs_counter_init(osd->od_stats, LPROC_OSD_GET_PAGE, - LPROCFS_CNTR_AVGMINMAX|LPROCFS_CNTR_STDDEV, - "get_page", "usec"); - lprocfs_counter_init(osd->od_stats, LPROC_OSD_NO_PAGE, - LPROCFS_CNTR_AVGMINMAX, - "get_page_failures", "num"); - lprocfs_counter_init(osd->od_stats, LPROC_OSD_CACHE_ACCESS, - LPROCFS_CNTR_AVGMINMAX, - "cache_access", "pages"); - lprocfs_counter_init(osd->od_stats, LPROC_OSD_CACHE_HIT, - LPROCFS_CNTR_AVGMINMAX, - "cache_hit", "pages"); - lprocfs_counter_init(osd->od_stats, LPROC_OSD_CACHE_MISS, - LPROCFS_CNTR_AVGMINMAX, - "cache_miss", "pages"); + int i, result; + ENTRY; + + for (i = 0; i < BRW_LAST; i++) { + result = lprocfs_oh_init(&osd->od_brw_stats.brw_hist[i], -1); + if (result < 0) + GOTO(out, result); + } + + osd->od_stats = lprocfs_alloc_stats(LPROC_OSD_LAST, 0); + if (osd->od_stats == NULL) + GOTO(out, result = -ENOMEM); + + result = lprocfs_register_stats(osd->od_proc_entry, "stats", + osd->od_stats); + if (result) + GOTO(out, result); + + lprocfs_counter_init(osd->od_stats, LPROC_OSD_GET_PAGE, + LPROCFS_CNTR_AVGMINMAX|LPROCFS_CNTR_STDDEV, + "get_page", "usec"); + lprocfs_counter_init(osd->od_stats, LPROC_OSD_NO_PAGE, + LPROCFS_CNTR_AVGMINMAX, + "get_page_failures", "num"); + lprocfs_counter_init(osd->od_stats, LPROC_OSD_CACHE_ACCESS, + LPROCFS_CNTR_AVGMINMAX, + "cache_access", "pages"); + lprocfs_counter_init(osd->od_stats, LPROC_OSD_CACHE_HIT, + LPROCFS_CNTR_AVGMINMAX, + "cache_hit", "pages"); + lprocfs_counter_init(osd->od_stats, LPROC_OSD_CACHE_MISS, + LPROCFS_CNTR_AVGMINMAX, + "cache_miss", "pages"); #if OSD_THANDLE_STATS - lprocfs_counter_init(osd->od_stats, LPROC_OSD_THANDLE_STARTING, - LPROCFS_CNTR_AVGMINMAX, - "thandle starting", "usec"); - lprocfs_counter_init(osd->od_stats, LPROC_OSD_THANDLE_OPEN, - LPROCFS_CNTR_AVGMINMAX, - "thandle open", "usec"); - lprocfs_counter_init(osd->od_stats, LPROC_OSD_THANDLE_CLOSING, - LPROCFS_CNTR_AVGMINMAX, - "thandle closing", "usec"); + lprocfs_counter_init(osd->od_stats, LPROC_OSD_THANDLE_STARTING, + LPROCFS_CNTR_AVGMINMAX, + "thandle starting", "usec"); + lprocfs_counter_init(osd->od_stats, LPROC_OSD_THANDLE_OPEN, + LPROCFS_CNTR_AVGMINMAX, + "thandle open", "usec"); + lprocfs_counter_init(osd->od_stats, LPROC_OSD_THANDLE_CLOSING, + LPROCFS_CNTR_AVGMINMAX, + "thandle closing", "usec"); #endif - result = lprocfs_seq_create(osd->od_proc_entry, "brw_stats", - 0644, &osd_brw_stats_fops, osd); - } else - result = -ENOMEM; + result = lprocfs_seq_create(osd->od_proc_entry, "brw_stats", + 0644, &osd_brw_stats_fops, osd); out: - RETURN(result); + RETURN(result); } int osd_procfs_init(struct osd_device *osd, const char *name) @@ -271,16 +292,25 @@ out: return rc; } -int osd_procfs_fini(struct osd_device *osd) +static void osd_stats_fini(struct osd_device *osd) { - if (osd->od_stats) + int i; + + for (i = 0; i < BRW_LAST; i++) + lprocfs_oh_fini(&osd->od_brw_stats.brw_hist[i]); + + if (osd->od_stats != NULL) lprocfs_free_stats(&osd->od_stats); +} - if (osd->od_proc_entry) { - lprocfs_remove(&osd->od_proc_entry); - osd->od_proc_entry = NULL; - } - RETURN(0); +void osd_procfs_fini(struct osd_device *osd) +{ + osd_stats_fini(osd); + + if (osd->od_proc_entry) { + lprocfs_remove(&osd->od_proc_entry); + osd->od_proc_entry = NULL; + } } static int lprocfs_osd_rd_fstype(char *page, char **start, off_t off, int count,