diff -r b2b6c64c9634 lustre/include/linux/obd_support.h --- a/lustre/include/linux/obd_support.h Wed Oct 27 13:50:21 2010 +0100 +++ b/lustre/include/linux/obd_support.h Mon Jun 06 00:52:33 2011 +0900 @@ -44,6 +44,8 @@ #ifdef __KERNEL__ #ifndef AUTOCONF_INCLUDED #include +#include +#include #endif #include #include @@ -102,9 +104,75 @@ # endif #endif /* __KERNEL__ */ +#ifdef HAVE_ADLER +/* Adler-32 is supported */ +#define CHECKSUM_ADLER OBD_CKSUM_ADLER +#else +#define CHECKSUM_ADLER 0 +#endif + +#ifdef X86_FEATURE_XMM4_2 +/* Call Nehalem+ CRC32C harware acceleration instruction on individual bytes. */ +static inline __u32 crc32c_hw_byte(__u32 crc, unsigned char const *p, + size_t bytes) +{ + while (bytes--) { + __asm__ __volatile__ ( + ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" + : "=S"(crc) + : "0"(crc), "c"(*p) + ); + p++; + } + + return crc; +} + +#if BITS_PER_LONG > 32 +#define WORD_SHIFT 3 +#define WORD_MASK 7 +#define REX "0x48, " +#else +#define WORD_SHIFT 2 +#define WORD_MASK 3 +#define REX "" +#endif + +/* Do we need to worry about unaligned input data here? */ +static inline __u32 crc32c_hw(__u32 crc, unsigned char const *p, size_t len) +{ + unsigned int words = len >> WORD_SHIFT; + unsigned int bytes = len & WORD_MASK; + long *ptmp = (long *)p; + + while (words--) { + __asm__ __volatile__( + ".byte 0xf2, " REX "0xf, 0x38, 0xf1, 0xf1;" + : "=S"(crc) + : "0"(crc), "c"(*ptmp) + ); + ptmp++; + } + + if (bytes) + crc = crc32c_hw_byte(crc, (unsigned char *)ptmp, bytes); + + return crc; +} +#else +/* We should never call this unless the CPU has previously been detected to + * support this instruction in the SSE4.2 feature set. b=23549 */ +static inline __u32 crc32c_hw(__u32 crc, unsigned char const *p,size_t len) +{ + LBUG(); +} +#endif + static inline __u32 init_checksum(cksum_type_t cksum_type) { switch(cksum_type) { + case OBD_CKSUM_CRC32C: + return ~0U; case OBD_CKSUM_CRC32: return ~0U; #ifdef HAVE_ADLER @@ -122,12 +190,14 @@ size_t len, cksum_type_t cksum_type) { switch(cksum_type) { - case OBD_CKSUM_CRC32: - return crc32_le(cksum, p, len); + case OBD_CKSUM_CRC32C: + return crc32c_hw(cksum, p, len); #ifdef HAVE_ADLER case OBD_CKSUM_ADLER: return zlib_adler32(cksum, p, len); #endif + case OBD_CKSUM_CRC32: + return crc32_le(cksum, p, len); default: CERROR("Unknown checksum type (%x)!!!\n", cksum_type); LBUG(); @@ -135,36 +205,85 @@ return 0; } +/* The OBD_FL_CKSUM_* flags is packed into 5 bits of o_flags, since there can + * only be a single checksum type per RPC. + * + * The OBD_CHECKSUM_* type bits passed in ocd_cksum_types are a 32-bit bitmask + * since they need to represent the full range of checksum algorithms that + * both the client and server can understand. + * + * In case of an unsupported types/flags we fall back to CRC32 (even though + * it isn't very fast) because that is supported by all clients. + * checksums, since 1.6.5 (or earlier via patches). + * + * These flags should be listed in order of descending performance, so that + * in case multiple algorithms are supported the best one is used. */ static inline obd_flag cksum_type_pack(cksum_type_t cksum_type) { - switch(cksum_type) { - case OBD_CKSUM_CRC32: - return OBD_FL_CKSUM_CRC32; + if (cksum_type & OBD_CKSUM_CRC32C) + return OBD_FL_CKSUM_CRC32C; #ifdef HAVE_ADLER - case OBD_CKSUM_ADLER: + if (cksum_type & OBD_CKSUM_ADLER) return OBD_FL_CKSUM_ADLER; #endif - default: + if (unlikely(cksum_type && !(cksum_type & OBD_CKSUM_CRC32))) CWARN("unknown cksum type %x\n", cksum_type); - } + return OBD_FL_CKSUM_CRC32; } static inline cksum_type_t cksum_type_unpack(obd_flag o_flags) { - o_flags &= OBD_FL_CKSUM_ALL; - if ((o_flags - 1) & o_flags) - CWARN("several checksum types are set: %x\n", o_flags); - if (o_flags & OBD_FL_CKSUM_ADLER) + switch (o_flags & OBD_FL_CKSUM_ALL) { + case OBD_FL_CKSUM_CRC32C: + return OBD_CKSUM_CRC32C; + case OBD_FL_CKSUM_ADLER: #ifdef HAVE_ADLER return OBD_CKSUM_ADLER; #else CWARN("checksum type is set to adler32, but adler32 is not " "supported (%x)\n", o_flags); + break; #endif + default: + break; + } + + /* 1.6.4- only supported CRC32 and didn't set o_flags */ return OBD_CKSUM_CRC32; } +/* Return a bitmask of the checksum types supported on this system. + * + * CRC32 is a required for compatibility (starting with 1.6.5), + * after which we could move to Adler as the base checksum type. + * + * If hardware crc32c support is not available, it is slower than Adler, + * so don't include it, even if it could be emulated in software. b=23549 */ +static inline cksum_type_t cksum_types_supported(void) +{ + cksum_type_t ret = OBD_CKSUM_CRC32 | CHECKSUM_ADLER; + +#ifdef X86_FEATURE_XMM4_2 + if (cpu_has_xmm4_2) + ret |= OBD_CKSUM_CRC32C; +#endif + return ret; +} + +/* Select the best checksum algorithm among those supplied in the cksum_types + * input. + * + * Currently, calling cksum_type_pack() with a mask will return the fastest + * checksum type due to its ordering, but in the future we might want to + * determine this based on benchmarking the different algorithms quickly. + * Caution is advised, however, since what is fastest on a single client may + * not be the fastest or most efficient algorithm on the server. */ +static inline cksum_type_t cksum_type_select(cksum_type_t cksum_types) +{ + return cksum_type_unpack(cksum_type_pack(cksum_types)); +} + #ifdef __KERNEL__ # include # include diff -r b2b6c64c9634 lustre/include/lustre/lustre_idl.h --- a/lustre/include/lustre/lustre_idl.h Wed Oct 27 13:50:21 2010 +0100 +++ b/lustre/include/lustre/lustre_idl.h Mon Jun 06 00:52:33 2011 +0900 @@ -427,6 +427,7 @@ typedef enum { OBD_CKSUM_CRC32 = 0x00000001, OBD_CKSUM_ADLER = 0x00000002, + OBD_CKSUM_CRC32C= 0x00000004, } cksum_type_t; /* @@ -488,14 +489,17 @@ OBD_FL_TRUNCLOCK = 0x00000800, /* delegate DLM locking during punch*/ OBD_FL_CKSUM_CRC32 = 0x00001000, /* CRC32 checksum type */ OBD_FL_CKSUM_ADLER = 0x00002000, /* ADLER checksum type */ - OBD_FL_CKSUM_RSVD1 = 0x00004000, /* for future cksum types */ + OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */ OBD_FL_CKSUM_RSVD2 = 0x00008000, /* for future cksum types */ OBD_FL_CKSUM_RSVD3 = 0x00010000, /* for future cksum types */ OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */ OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client */ OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */ - OBD_FL_CKSUM_ALL = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER, + /* Note that while these checksum values are currently separate bits, + * in 2.x we can actually allow all values from 1-31 if we wanted. */ + OBD_FL_CKSUM_ALL = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER | + OBD_FL_CKSUM_CRC32C, /* mask for local-only flag, which won't be sent over network */ OBD_FL_LOCAL_MASK = 0xF0000000, diff -r b2b6c64c9634 lustre/include/obd.h --- a/lustre/include/obd.h Wed Oct 27 13:50:21 2010 +0100 +++ b/lustre/include/obd.h Mon Jun 06 00:52:33 2011 +0900 @@ -1423,20 +1423,14 @@ * Checksums */ -#ifdef HAVE_ADLER -/* Default preferred checksum algorithm to use (if supported by the server) */ -#define OSC_DEFAULT_CKSUM OBD_CKSUM_ADLER -/* Adler-32 is supported */ -#define CHECKSUM_ADLER OBD_CKSUM_ADLER -#else -#define OSC_DEFAULT_CKSUM OBD_CKSUM_CRC32 +#ifndef CHECKSUM_ADLER #define CHECKSUM_ADLER 0 #endif -#define OBD_CKSUM_ALL (OBD_CKSUM_CRC32 | CHECKSUM_ADLER) +#define OBD_CKSUM_ALL (OBD_CKSUM_CRC32 | CHECKSUM_ADLER | OBD_CKSUM_CRC32C) /* Checksum algorithm names. Must be defined in the same order as the * OBD_CKSUM_* flags. */ -#define DECLARE_CKSUM_NAME char *cksum_name[] = {"crc32", "adler"} +#define DECLARE_CKSUM_NAME char *cksum_name[] = {"crc32", "adler", "crc32c"} #endif /* __OBD_H */ diff -r b2b6c64c9634 lustre/llite/llite_lib.c --- a/lustre/llite/llite_lib.c Wed Oct 27 13:50:21 2010 +0100 +++ b/lustre/llite/llite_lib.c Mon Jun 06 00:52:33 2011 +0900 @@ -395,8 +395,7 @@ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY)) data->ocd_cksum_types = OBD_CKSUM_ADLER; else - /* send the list of supported checksum types */ - data->ocd_cksum_types = OBD_CKSUM_ALL; + data->ocd_cksum_types = cksum_types_supported(); } #ifdef HAVE_LRU_RESIZE_SUPPORT diff -r b2b6c64c9634 lustre/llite/rw.c --- a/lustre/llite/rw.c Wed Oct 27 13:50:21 2010 +0100 +++ b/lustre/llite/rw.c Mon Jun 06 00:52:33 2011 +0900 @@ -266,12 +266,13 @@ struct ll_async_page *llap = llap_cast_private(page); if (llap != NULL) { char *kaddr = kmap_atomic(page, KM_USER0); - llap->llap_checksum = - init_checksum(OSC_DEFAULT_CKSUM); + cksum_type_t ctype = + cksum_type_select(cksum_types_supported()); + llap->llap_checksum = init_checksum(ctype); llap->llap_checksum = compute_checksum(llap->llap_checksum, kaddr, CFS_PAGE_SIZE, - OSC_DEFAULT_CKSUM); + ctype); kunmap_atomic(kaddr, KM_USER0); } page_cache_release(page); @@ -903,11 +904,12 @@ out: if (unlikely(sbi->ll_flags & LL_SBI_LLITE_CHECKSUM)) { + cksum_type_t ctype = cksum_type_select(cksum_types_supported()); + char *kaddr = kmap_atomic(page, KM_USER0); __u32 csum; - char *kaddr = kmap_atomic(page, KM_USER0); - csum = init_checksum(OSC_DEFAULT_CKSUM); - csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE, - OSC_DEFAULT_CKSUM); + + csum = init_checksum(ctype); + csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE, ctype); kunmap_atomic(kaddr, KM_USER0); if (origin == LLAP_ORIGIN_READAHEAD || origin == LLAP_ORIGIN_READPAGE) { @@ -992,12 +994,13 @@ /* compare the checksum once before the page leaves llite */ if (unlikely((sbi->ll_flags & LL_SBI_LLITE_CHECKSUM) && llap->llap_checksum != 0)) { - __u32 csum; + cksum_type_t ctype = cksum_type_select(cksum_types_supported()); struct page *page = llap->llap_page; char *kaddr = kmap_atomic(page, KM_USER0); - csum = init_checksum(OSC_DEFAULT_CKSUM); - csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE, - OSC_DEFAULT_CKSUM); + __u32 csum; + + csum = init_checksum(ctype); + csum = compute_checksum(csum, kaddr, CFS_PAGE_SIZE, ctype); kunmap_atomic(kaddr, KM_USER0); if (llap->llap_checksum == csum) { CDEBUG(D_PAGE, "page %p cksum %x confirmed\n", diff -r b2b6c64c9634 lustre/obdfilter/filter.c --- a/lustre/obdfilter/filter.c Wed Oct 27 13:50:21 2010 +0100 +++ b/lustre/obdfilter/filter.c Mon Jun 06 00:52:33 2011 +0900 @@ -2390,9 +2390,10 @@ /* The client set in ocd_cksum_types the checksum types it * supports. We have to mask off the algorithms that we don't * support */ - if (cksum_types & OBD_CKSUM_ALL) - data->ocd_cksum_types &= OBD_CKSUM_ALL; - else + data->ocd_cksum_types &= cksum_types_supported(); + + /* 1.6.4- only support CRC32 and didn't set ocd_cksum_types */ + if (unlikely(data->ocd_cksum_types == 0)) data->ocd_cksum_types = OBD_CKSUM_CRC32; CDEBUG(D_RPCTRACE, "%s: cli %s supports cksum type %x, return " diff -r b2b6c64c9634 lustre/osc/osc_request.c --- a/lustre/osc/osc_request.c Wed Oct 27 13:50:21 2010 +0100 +++ b/lustre/osc/osc_request.c Mon Jun 06 00:52:33 2011 +0900 @@ -1382,11 +1382,8 @@ if (oa->o_valid & OBD_MD_FLFLAGS && oa->o_flags & OBD_FL_MMAP) return 1; - if (oa->o_valid & OBD_MD_FLFLAGS) - cksum_type = cksum_type_unpack(oa->o_flags); - else - cksum_type = OBD_CKSUM_CRC32; - + cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ? + oa->o_flags : 0); new_cksum = osc_checksum_bulk(nob, page_count, pga, OST_WRITE, cksum_type, pshift); @@ -1500,10 +1497,8 @@ char *router; cksum_type_t cksum_type; - if (body->oa.o_valid & OBD_MD_FLFLAGS) - cksum_type = cksum_type_unpack(body->oa.o_flags); - else - cksum_type = OBD_CKSUM_CRC32; + cksum_type = cksum_type_unpack(body->oa.o_valid &OBD_MD_FLFLAGS? + body->oa.o_flags : 0); client_cksum = osc_checksum_bulk(rc, aa->aa_page_count, aa->aa_ppga, OST_READ, cksum_type, aa->aa_pshift); @@ -2274,10 +2269,10 @@ aa->aa_oa->o_flags & OBD_FL_MMAP) { rc = 0; } else { - rc = osc_brw_redo_request(request, aa); - if (rc == 0) - RETURN(0); - } + rc = osc_brw_redo_request(request, aa); + if (rc == 0) + RETURN(0); + } } cli = aa->aa_cli; diff -r b2b6c64c9634 lustre/ost/ost_handler.c --- a/lustre/ost/ost_handler.c Wed Oct 27 13:50:21 2010 +0100 +++ b/lustre/ost/ost_handler.c Mon Jun 06 00:52:33 2011 +0900 @@ -761,13 +761,12 @@ } if (body->oa.o_valid & OBD_MD_FLCKSUM) { - cksum_type_t cksum_type = OBD_CKSUM_CRC32; - - if (body->oa.o_valid & OBD_MD_FLFLAGS) - cksum_type = cksum_type_unpack(body->oa.o_flags); + cksum_type_t cksum_type = + cksum_type_unpack(body->oa.o_valid & OBD_MD_FLFLAGS ? + body->oa.o_flags : 0); body->oa.o_flags = cksum_type_pack(cksum_type); body->oa.o_valid = OBD_MD_FLCKSUM | OBD_MD_FLFLAGS; - body->oa.o_cksum = ost_checksum_bulk(desc, OST_READ, cksum_type); + body->oa.o_cksum = ost_checksum_bulk(desc, OST_READ,cksum_type); CDEBUG(D_PAGE,"checksum at read origin: %x\n",body->oa.o_cksum); } else { body->oa.o_valid = 0; @@ -991,8 +990,9 @@ /* obd_preprw clobbers oa->valid, so save what we need */ if (body->oa.o_valid & OBD_MD_FLCKSUM) { client_cksum = body->oa.o_cksum; - if (body->oa.o_valid & OBD_MD_FLFLAGS) - cksum_type = cksum_type_unpack(body->oa.o_flags); + cksum_type = + cksum_type_unpack(body->oa.o_valid & OBD_MD_FLFLAGS ? + body->oa.o_flags : 0); } if (body->oa.o_valid & OBD_MD_FLFLAGS && body->oa.o_flags & OBD_FL_MMAP) mmap = 1; diff -r b2b6c64c9634 lustre/ptlrpc/import.c --- a/lustre/ptlrpc/import.c Wed Oct 27 13:50:21 2010 +0100 +++ b/lustre/ptlrpc/import.c Mon Jun 06 00:52:33 2011 +0900 @@ -1030,23 +1030,15 @@ OBD_CKSUM_ALL); cli->cl_checksum = 0; cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; - cli->cl_cksum_type = OBD_CKSUM_CRC32; } else { cli->cl_supp_cksum_types = ocd->ocd_cksum_types; - - if (ocd->ocd_cksum_types & OSC_DEFAULT_CKSUM) - cli->cl_cksum_type = OSC_DEFAULT_CKSUM; - else if (ocd->ocd_cksum_types & OBD_CKSUM_ADLER) - cli->cl_cksum_type = OBD_CKSUM_ADLER; - else - cli->cl_cksum_type = OBD_CKSUM_CRC32; } } else { /* The server does not support OBD_CONNECT_CKSUM. * Enforce CRC32 for backward compatibility*/ cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; - cli->cl_cksum_type = OBD_CKSUM_CRC32; } + cli->cl_cksum_type =cksum_type_select(cli->cl_supp_cksum_types); if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) { cli->cl_max_pages_per_rpc = diff -r b2b6c64c9634 lustre/ptlrpc/wiretest.c --- a/lustre/ptlrpc/wiretest.c Wed Oct 27 13:50:21 2010 +0100 +++ b/lustre/ptlrpc/wiretest.c Mon Jun 06 00:52:33 2011 +0900 @@ -722,11 +722,13 @@ CLASSERT(OBD_FL_TRUNCLOCK == (0x00000800)); CLASSERT(OBD_FL_CKSUM_CRC32 == (0x00001000)); CLASSERT(OBD_FL_CKSUM_ADLER == (0x00002000)); + CLASSERT(OBD_FL_CKSUM_CRC32C == (0x00004000)); CLASSERT(OBD_FL_SHRINK_GRANT == (0x00020000)); CLASSERT(OBD_FL_MMAP == (0x00040000)); CLASSERT(OBD_FL_RECOV_RESEND == (0x00080000)); CLASSERT(OBD_CKSUM_CRC32 == 1); CLASSERT(OBD_CKSUM_ADLER == 2); + CLASSERT(OBD_CKSUM_CRC32C == 4); /* Checks for struct lov_mds_md_v1 */ LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n", diff -r b2b6c64c9634 lustre/utils/wirecheck.c --- a/lustre/utils/wirecheck.c Wed Oct 27 13:50:21 2010 +0100 +++ b/lustre/utils/wirecheck.c Mon Jun 06 00:52:33 2011 +0900 @@ -329,11 +329,13 @@ CHECK_CDEFINE(OBD_FL_TRUNCLOCK); CHECK_CDEFINE(OBD_FL_CKSUM_CRC32); CHECK_CDEFINE(OBD_FL_CKSUM_ADLER); + CHECK_CDEFINE(OBD_FL_CKSUM_CRC32C); CHECK_CDEFINE(OBD_FL_SHRINK_GRANT); CHECK_CDEFINE(OBD_FL_MMAP); CHECK_CDEFINE(OBD_FL_RECOV_RESEND); CHECK_CVALUE(OBD_CKSUM_CRC32); CHECK_CVALUE(OBD_CKSUM_ADLER); + CHECK_CVALUE(OBD_CKSUM_CRC32C); } static void diff -r b2b6c64c9634 lustre/utils/wiretest.c --- a/lustre/utils/wiretest.c Wed Oct 27 13:50:21 2010 +0100 +++ b/lustre/utils/wiretest.c Mon Jun 06 00:52:33 2011 +0900 @@ -720,11 +720,13 @@ CLASSERT(OBD_FL_TRUNCLOCK == (0x00000800)); CLASSERT(OBD_FL_CKSUM_CRC32 == (0x00001000)); CLASSERT(OBD_FL_CKSUM_ADLER == (0x00002000)); + CLASSERT(OBD_FL_CKSUM_CRC32C == (0x00004000)); CLASSERT(OBD_FL_SHRINK_GRANT == (0x00020000)); CLASSERT(OBD_FL_MMAP == (0x00040000)); CLASSERT(OBD_FL_RECOV_RESEND == (0x00080000)); CLASSERT(OBD_CKSUM_CRC32 == 1); CLASSERT(OBD_CKSUM_ADLER == 2); + CLASSERT(OBD_CKSUM_CRC32C == 4); /* Checks for struct lov_mds_md_v1 */ LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n",