<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:14:22 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-1191] Test failure on test suite parallel-scale-nfsv3, subtest test_metabench.</title>
                <link>https://jira.whamcloud.com/browse/LU-1191</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;This issue was created by maloo for sarah &amp;lt;sarah@whamcloud.com&amp;gt;&lt;/p&gt;

&lt;p&gt;This issue relates to the following test suite run: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/6a3c567a-6702-11e1-a9a4-5254004bbbd3&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/6a3c567a-6702-11e1-a9a4-5254004bbbd3&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;The sub-test test_metabench failed with the following error:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;metabench failed! 1&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;Info required for matching: parallel-scale-nfsv3 metabench&lt;/p&gt;</description>
                <environment></environment>
        <key id="13456">LU-1191</key>
            <summary>Test failure on test suite parallel-scale-nfsv3, subtest test_metabench.</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="bobijam">Zhenyu Xu</assignee>
                                    <reporter username="maloo">Maloo</reporter>
                        <labels>
                    </labels>
                <created>Mon, 5 Mar 2012 17:42:15 +0000</created>
                <updated>Thu, 24 May 2012 08:56:30 +0000</updated>
                            <resolved>Thu, 24 May 2012 08:56:30 +0000</resolved>
                                    <version>Lustre 2.2.0</version>
                    <version>Lustre 1.8.8</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="30746" author="pjones" created="Fri, 9 Mar 2012 00:47:55 +0000"  >&lt;p&gt;Bobi&lt;/p&gt;

&lt;p&gt;Could you look into this one please?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="31012" author="bobijam" created="Tue, 13 Mar 2012 02:59:49 +0000"  >&lt;p&gt;Tried to manually run it on toro node for 8 times (client-6 as MDS, client-7 as OST, client-8 as client, using lustre_b2_2 build no 7 image), hadn&apos;t hit it.&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedHeader panelHeader&quot; style=&quot;border-bottom-width: 1px;&quot;&gt;&lt;b&gt;client-8 console&lt;/b&gt;&lt;/div&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;Lustre: DEBUG MARKER: -----============= acceptance-small: parallel-scale-nfsv3 ============----- Mon Mar 12 23:25:44 PDT 2012
LustreError: 9854:0:(ldlm_request.c:1170:ldlm_cli_cancel_req()) Got rc -108 from cancel RPC: canceling anyway
LustreError: 9854:0:(ldlm_request.c:1796:ldlm_cli_cancel_list()) ldlm_cli_cancel_list: -108
LustreError: 9854:0:(ldlm_request.c:1170:ldlm_cli_cancel_req()) Got rc -108 from cancel RPC: canceling anyway
LustreError: 9854:0:(ldlm_request.c:1796:ldlm_cli_cancel_list()) ldlm_cli_cancel_list: -108
Lustre: client ffff8103150efc00 umount complete
Lustre: DEBUG MARKER: only running test metabench
Lustre: DEBUG MARKER: excepting tests:
Lustre: DEBUG MARKER: == parallel-scale-nfsv3 test metabench: metabench ==================================================== 23:25:52 (1331619952)
LustreError: 152-6: Ignoring deprecated mount option &apos;acl&apos;.
Lustre: MGC10.10.4.6@tcp: Reactivating import
Lustre: Client lustre-client has started
Lustre: 11119:0:(debug.c:326:libcfs_debug_str2mask()) You are trying to use a numerical value for the mask - this will be deprecated in a future release.
Lustre: 11119:0:(debug.c:326:libcfs_debug_str2mask()) Skipped 3 previous similar messages
Lustre: DEBUG MARKER: == parallel-scale-nfsv3 parallel-scale-nfsv3.sh test complete, duration 1599 sec ===================== 23:52:23 (1331621543)
parallel-scale-nfsv3 returned 0
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedHeader panelHeader&quot; style=&quot;border-bottom-width: 1px;&quot;&gt;&lt;b&gt;auster result report&lt;/b&gt;&lt;/div&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;[root@client-8 tests]# cat /tmp/test_logs/2012-03-12/232511/results.yml
TestGroup:
    test_group: acc-sm-client-8
    testhost: client-8
    submission: Mon Mar 12 23:25:44 PDT 2012
    user_name: root

Tests:
-
        name: parallel-scale-nfsv3
        description: auster parallel-scale-nfsv3
        submission: Mon Mar 12 23:25:44 PDT 2012
        report_version: 2
        SubTests:
        -
            name: test_metabench
            status: PASS
            duration: 1589
            return_code: 0
            error:
        duration: 1599
        status: PASS
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="31158" author="pjones" created="Wed, 14 Mar 2012 14:24:42 +0000"  >&lt;p&gt;As per Oleg/Andreas, this is a test script issue. It should be fixed but is not a blocker for RC1&lt;/p&gt;</comment>
                            <comment id="31163" author="green" created="Wed, 14 Mar 2012 14:26:30 +0000"  >&lt;p&gt;This seems to be a bug in parallel-scale-nfsv3.sh&lt;/p&gt;

&lt;p&gt;in nfsv4 script we see:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;zconf_mount_clients $lustre_client $MOUNT \
    &lt;span class=&quot;code-quote&quot;&gt;&quot;-o user_xattr,flock,32bitapi&quot;&lt;/span&gt; || \
    error &lt;span class=&quot;code-quote&quot;&gt;&quot;mount lustre on $lustre_client failed&quot;&lt;/span&gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;but the nfsv3 one does not have it:&lt;/p&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;zconf_mount_clients $lustre_client $MOUNT || \
    error &lt;span class=&quot;code-quote&quot;&gt;&quot;mount lustre on $lustre_client failed&quot;&lt;/span&gt;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="31194" author="mdiep" created="Wed, 14 Mar 2012 14:41:13 +0000"  >&lt;p&gt;I thought we don&apos;t need 32bitapi for nfsv3&lt;/p&gt;</comment>
                            <comment id="31213" author="adilger" created="Wed, 14 Mar 2012 15:21:20 +0000"  >&lt;p&gt;Actually, I think the opposite.  For 64-bit Lustre and NFS clients, it should be expected that 64-bit (well, 63-bit) directory cookies work correctly, and 32bitapi is (AFAIK) only needed if a Lustre 64-bit client is re-exporting to 32-bit NFS clients that cannot be detected by Lustre (e.g. 32-bit CPU or NFSv2), which is not the case here.&lt;/p&gt;

&lt;p&gt;It doesn&apos;t make sense to me why exporting a 32bit readdir cookie would &lt;em&gt;avoid&lt;/em&gt; hash collisions, rather the opposite.  The whole reason that we added 64-bit hash cookies is to avoid hash collisions that could cause readdir to get stuck.&lt;/p&gt;

&lt;p&gt;I&apos;d like to get Fan Yong&apos;s input on this issue, rather than burying it as a test issue.  Our customers are not going to know about using 32bitapi, and I suspect if they did we would begin to see the same problems that we saw in the past before moving to 64-bit hashes.&lt;/p&gt;


&lt;p&gt;The actual test failure is reported in &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/10f76e60-6d83-11e1-9174-5254004bbbd3:&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/10f76e60-6d83-11e1-9174-5254004bbbd3:&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Lustre: DEBUG MARKER: == parallel-scale-nfsv3 test metabench: metabench == 14:19:31 (1331673571)&lt;br/&gt;
NFS: directory d0.metabench/TIME_CREATE_007.000 contains a readdir loop.  Please contact your server vendor.  Offending cookie: 279037783686854792&lt;/p&gt;

&lt;p&gt;(Note: it is always good practice to put in the real failure message into a bug to make searching easier, instead of only linking to the Maloo report, which is hard to search and may disappear at some point)&lt;/p&gt;

&lt;p&gt;On a related note, having a whole duplicate parallel-scale-nfsv3.sh and parallel-scale-nfsv4.sh, just for a different &quot;setup_nfs 3&quot; vs. &quot;setup_nfs 4&quot; commands, is one reason that issues like the mount options can get skewed between the two patches (still not saying that &quot;32bitapi&quot; mount option is &lt;em&gt;correct&lt;/em&gt;, however).  It would be less maintenance and less error prone to have a single parallel-scale-nfsv4.sh script that defaults to running NFSv4, and have a simple parallel-scale-nfsv3.sh wrapper that runs it with &quot;3&quot; as an argument and/or environment to have it run NFSv3:&lt;/p&gt;

&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;NFSVER=${NFSVER:-4}
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="31214" author="adilger" created="Wed, 14 Mar 2012 15:55:23 +0000"  >&lt;p&gt;Note that parallel-scale-nfsv4.sh is also failing metabench for the same reason as nfsv3:&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/9d1a0296-6657-11e1-92b1-5254004bbbd3&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/9d1a0296-6657-11e1-92b1-5254004bbbd3&lt;/a&gt;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;15:06:32:Lustre: DEBUG MARKER: == parallel-scale-nfsv4 test metabench: metabench == 15:06:31 (1330902391)
15:09:46:NFS: directory d0.metabench/TIME_CREATE_007.000 contains a readdir loop.  Please contact your server vendor.  Offending cookie: 81211680
15:09:50:NFS: directory d0.metabench/TIME_CREATE_007.000 contains a readdir loop.  Please contact your server vendor.  Offending cookie: 81211680

Proc 7: Cant remove directory [/mnt/lustre/d0.metabench/TIME_CREATE_007.000]: Directory not empty
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;I also notice that many of the &quot;pass&quot; and &quot;fail&quot; tests complete within tens of seconds, and show &quot;0/0&quot; tests being run, which doesn&apos;t seem right?&lt;/p&gt;</comment>
                            <comment id="31603" author="mdiep" created="Tue, 20 Mar 2012 13:39:30 +0000"  >&lt;p&gt;using 32bitapi to mount and run nfsv3 still hit this issue&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://maloo.whamcloud.com/test_sets/51ae130e-72b1-11e1-91cb-5254004bbbd3&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/51ae130e-72b1-11e1-91cb-5254004bbbd3&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="31659" author="yong.fan" created="Wed, 21 Mar 2012 05:52:54 +0000"  >&lt;div class=&quot;panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;panelContent&quot;&gt;
&lt;p&gt;15:06:32:Lustre: DEBUG MARKER: == parallel-scale-nfsv4 test metabench: metabench == 15:06:31 (1330902391)&lt;br/&gt;
15:09:46:NFS: directory d0.metabench/TIME_CREATE_007.000 contains a readdir loop.  Please contact your server vendor.  Offending cookie: 81211680&lt;br/&gt;
15:09:50:NFS: directory d0.metabench/TIME_CREATE_007.000 contains a readdir loop.  Please contact your server vendor.  Offending cookie: 81211680&lt;/p&gt;

&lt;p&gt;Proc 7: Cant remove directory &lt;span class=&quot;error&quot;&gt;&amp;#91;/mnt/lustre/d0.metabench/TIME_CREATE_007.000&amp;#93;&lt;/span&gt;: Directory not empty&lt;/p&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;It is strange that the offending cookie &quot;81211680&quot; is too small for Lustre client readdir() packed value. Because according to Lustre current implementation, the 64-bit cookie is composed of two parts, the high 32-bit is for major hash, the low 32-bit is minor hash. Normally, the major hash is non-zero, so the 64-bit cookie should be larger than the offending cookie. So it seems an invalid cookie. Is &quot;32bitapi&quot; used for this case?&lt;/p&gt;</comment>
                            <comment id="31750" author="yong.fan" created="Wed, 21 Mar 2012 11:33:32 +0000"  >&lt;p&gt;Minh, Is there any test failure instance with rhel5-based client (without nfs readdir() loop detect mechanism)?&lt;/p&gt;</comment>
                            <comment id="31832" author="yong.fan" created="Thu, 22 Mar 2012 02:12:35 +0000"  >&lt;p&gt;I have tested metabench against ext4-based (RHEL6 2.6.32-220.el6) nfs export. Got the same error:&lt;/p&gt;

&lt;p&gt;==============================&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;root@RHEL6-nasf-CSW tests&amp;#93;&lt;/span&gt;# metabench -w /mnt/nfs/d0.metabench -c 30400 -C -S -k&lt;br/&gt;
Metadata Test &amp;lt;no-name&amp;gt; on 03/19/2012 at 00:07:11&lt;/p&gt;

&lt;p&gt;Rank   0 process on node RHEL6-nasf-CSW&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;03/19/2012 00:07:11&amp;#93;&lt;/span&gt; Entering time_file_creation with proc_id = 0&lt;br/&gt;
File Creation Rates from Process 0&lt;br/&gt;
totfile    tot time   create rate   interval   intv time     intv rate&lt;br/&gt;
=======  ==========  ============   ========  ==========  ============&lt;br/&gt;
   1000      0.2808       3561.58       1000      0.2808       3561.58&lt;br/&gt;
   2000      0.5887       3397.54       1000      0.3079       3247.94&lt;br/&gt;
   3000      0.8639       3472.66       1000      0.2752       3633.32&lt;br/&gt;
   4000      1.1539       3466.40       1000      0.2900       3447.75&lt;br/&gt;
   5000      1.4346       3485.38       1000      0.2806       3563.41&lt;br/&gt;
   6000      1.7106       3507.59       1000      0.2760       3623.03&lt;br/&gt;
   7000      2.0276       3452.30       1000      0.3171       3154.04&lt;br/&gt;
   8000      2.3266       3438.53       1000      0.2989       3345.13&lt;br/&gt;
   9000      2.6090       3449.60       1000      0.2824       3540.73&lt;br/&gt;
  10000      2.8833       3468.22       1000      0.2743       3645.30&lt;br/&gt;
  11000      3.1637       3476.94       1000      0.2804       3566.64&lt;br/&gt;
  12000      3.4533       3474.91       1000      0.2896       3452.80&lt;br/&gt;
  13000      3.7503       3466.38       1000      0.2970       3367.09&lt;br/&gt;
  14000      3.9503       3544.01       1000      0.2000       4999.70&lt;br/&gt;
  15000      4.1063       3652.88       1000      0.1560       6409.39&lt;br/&gt;
  16000      4.2914       3728.42       1000      0.1850       5405.00&lt;br/&gt;
  17000      4.4465       3823.19       1000      0.1552       6444.05&lt;br/&gt;
  18000      4.5869       3924.23       1000      0.1403       7125.35&lt;br/&gt;
  19000      4.7420       4006.71       1000      0.1552       6444.92&lt;br/&gt;
  20000      4.8917       4088.56       1000      0.1497       6682.12&lt;br/&gt;
  21000      5.0513       4157.37       1000      0.1596       6266.88&lt;br/&gt;
  22000      5.2084       4223.94       1000      0.1571       6364.04&lt;br/&gt;
  23000      5.3560       4294.27       1000      0.1476       6776.45&lt;br/&gt;
  24000      5.5587       4317.52       1000      0.2028       4931.57&lt;br/&gt;
  25000      5.7547       4344.29       1000      0.1959       5103.73&lt;br/&gt;
  26000      6.0578       4291.99       1000      0.3031       3299.12&lt;br/&gt;
  27000      6.2432       4324.70       1000      0.1854       5393.25&lt;br/&gt;
  28000      6.4053       4371.38       1000      0.1621       6169.64&lt;br/&gt;
  29000      6.5954       4396.98       1000      0.1901       5259.34&lt;br/&gt;
  30000      6.7850       4421.54       1000      0.1895       5276.35&lt;br/&gt;
  30400      6.8420       4443.11        400      0.0571       7006.36&lt;/p&gt;

&lt;p&gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;03/19/2012 00:07:18&amp;#93;&lt;/span&gt; Leaving time_file_creation with proc_id = 0&lt;br/&gt;
Removed 1003 files in      0.171 seconds&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;03/19/2012 00:07:18&amp;#93;&lt;/span&gt; FATAL error on process 0&lt;br/&gt;
Proc 0: Cant remove directory &lt;span class=&quot;error&quot;&gt;&amp;#91;/mnt/nfs/d0.metabench/TIME_CREATE_000.000&amp;#93;&lt;/span&gt;: Directory not empty&lt;br/&gt;
==============================&lt;/p&gt;

&lt;p&gt;The dmesg outout:&lt;br/&gt;
==============================&lt;br/&gt;
EXT4-fs (loop0): warning: maximal mount count reached, running e2fsck is recommended&lt;br/&gt;
EXT4-fs (loop0): mounted filesystem with ordered data mode. Opts: &lt;br/&gt;
NFSD: Using /var/lib/nfs/v4recovery as the NFSv4 state recovery directory&lt;br/&gt;
NFSD: starting 90-second grace period&lt;br/&gt;
NFS: directory d0.metabench/TIME_CREATE_000.000 contains a readdir loop.  Please contact your server vendor.  Offending cookie: 68723253&lt;br/&gt;
==============================&lt;/p&gt;

&lt;p&gt;The configuration:&lt;br/&gt;
==============================&lt;br/&gt;
&lt;span class=&quot;error&quot;&gt;&amp;#91;root@RHEL6-nasf-CSW tests&amp;#93;&lt;/span&gt;# mount&lt;br/&gt;
/dev/sda1 on / type ext4 (rw)&lt;br/&gt;
proc on /proc type proc (rw)&lt;br/&gt;
sysfs on /sys type sysfs (rw)&lt;br/&gt;
devpts on /dev/pts type devpts (rw,gid=5,mode=620)&lt;br/&gt;
tmpfs on /dev/shm type tmpfs (rw)&lt;br/&gt;
none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw)&lt;br/&gt;
sunrpc on /var/lib/nfs/rpc_pipefs type rpc_pipefs (rw)&lt;br/&gt;
172.16.114.1:/Users/nasf/Work on /Work type nfs (rw,addr=172.16.114.1)&lt;br/&gt;
nfsd on /proc/fs/nfsd type nfsd (rw)&lt;br/&gt;
/dev/loop3 on /mnt/ISO type iso9660 (rw)&lt;br/&gt;
/dev/loop0 on /mnt/lustre type ext4 (rw)&lt;br/&gt;
rhel6:/mnt/lustre on /mnt/nfs type nfs (rw,addr=172.16.114.111)&lt;br/&gt;
==============================&lt;/p&gt;

&lt;p&gt;So it seems not Lustre bug, but some NFS related issues.&lt;/p&gt;</comment>
                            <comment id="31833" author="bobijam" created="Thu, 22 Mar 2012 02:25:06 +0000"  >&lt;p&gt;it looks like a kernel bug and has been fixed in 3.1 kernel, bugzilla link &lt;a href=&quot;https://bugzilla.kernel.org/show_bug.cgi?id=38572&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://bugzilla.kernel.org/show_bug.cgi?id=38572&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="31864" author="bobijam" created="Thu, 22 Mar 2012 10:36:57 +0000"  >&lt;p&gt;patch tracking at &lt;a href=&quot;http://review.whamcloud.com/2369&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/2369&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="31912" author="green" created="Thu, 22 Mar 2012 15:29:42 +0000"  >&lt;p&gt;Good catch.&lt;br/&gt;
Unfortunately it does not help us at all in our testing as we use unpatched kernels on clients.&lt;/p&gt;

&lt;p&gt;Additionally this helps only with 32bitapi mount option of lustre in my testing, without the option readdir just plain returns nothing for me. This is both with nfs4 and nfsv3, so I imagine we need to adjust the release notes to mention that 32bitapi is mandatory for nfs reexport at this time. Once we get to the root of it we should be able to allow mounting without it for some cases of nfs reexport.&lt;/p&gt;</comment>
                            <comment id="31918" author="pjones" created="Thu, 22 Mar 2012 16:46:36 +0000"  >&lt;p&gt;As per discussion we will document this kernel fix in the release notes so dropping as a blocker&lt;/p&gt;</comment>
                            <comment id="31936" author="yong.fan" created="Thu, 22 Mar 2012 20:33:40 +0000"  >&lt;p&gt;Most customer want to use patchless client, but there are known NFS related bugs in the kernel and if they want to reexport Lustre through NFS, I think it is reasonable to require them to patch the kernel before reexporting.&lt;/p&gt;</comment>
                            <comment id="31950" author="adilger" created="Thu, 22 Mar 2012 21:29:08 +0000"  >&lt;p&gt;Fan Yong,&lt;br/&gt;
I could agree that users should use a Lustre-patched kernel for the NFS client, if the server kernel was patched appropriately.&lt;/p&gt;

&lt;p&gt;What I&apos;m still wondering about is the problem when not using 32bitapi. Why is that not working properly?  &lt;/p&gt;</comment>
                            <comment id="31958" author="bobijam" created="Fri, 23 Mar 2012 00:12:33 +0000"  >&lt;p&gt;Found out its another kernel issue which FanYong had contributed his effort (commit 832023bffb4b493f230be901f681020caf3ed1f8)&lt;/p&gt;

&lt;div class=&quot;panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;panelHeader&quot; style=&quot;border-bottom-width: 1px;&quot;&gt;&lt;b&gt;commit message&lt;/b&gt;&lt;/div&gt;&lt;div class=&quot;panelContent&quot;&gt;
&lt;p&gt;    nfsd4: Remove check for a 32-bit cookie in nfsd4_readdir()&lt;/p&gt;

&lt;p&gt;    Fan Yong &amp;lt;yong.fan@whamcloud.com&amp;gt; noticed setting&lt;br/&gt;
    FMODE_32bithash wouldn&apos;t work with nfsd v4, as&lt;br/&gt;
    nfsd4_readdir() checks for 32 bit cookies. However, according to RFC 3530&lt;br/&gt;
    cookies have a 64 bit type and cookies are also defined as u64 in&lt;br/&gt;
    &apos;struct nfsd4_readdir&apos;. So remove the test for &amp;gt;32-bit values.&lt;/p&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="31959" author="bobijam" created="Fri, 23 Mar 2012 00:17:22 +0000"  >&lt;p&gt;without &lt;a href=&quot;http://review.whamcloud.com/2374&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/2374&lt;/a&gt;, metabench failed at the nfs server (v4, mount w/o 32bitapi).&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# metabench -w /mnt/lustre2/ -c 10000 -C -S -k
Metadata Test &amp;lt;no-name&amp;gt; on 03/23/2012 at 10:34:15

Rank   0 process on node test3

[03/23/2012 10:34:15] Entering time_file_creation with proc_id = 0
File Creation Rates from Process 0
totfile    tot time   create rate   interval   intv time     intv rate
=======  ==========  ============   ========  ==========  ============
   1000      5.3044        188.52       1000      5.3044        188.52
   2000     10.5516        189.54       1000      5.2471        190.58
   3000     15.9621        187.95       1000      5.4105        184.83
   4000     21.6978        184.35       1000      5.7357        174.35
   5000     27.0131        185.10       1000      5.3153        188.13
   6000     32.5850        184.13       1000      5.5719        179.47
   7000     38.8467        180.20       1000      6.2617        159.70
   8000     44.1792        181.08       1000      5.3325        187.53
   9000     49.5300        181.71       1000      5.3508        186.89
  10000     55.1659        181.27       1000      5.6360        177.43

[03/23/2012 10:35:10] Leaving time_file_creation with proc_id = 0
Removed 0 files in      0.020 seconds
[03/23/2012 10:35:10] FATAL error on process 0
Proc 0: Cant remove directory [/mnt/lustre2//TIME_CREATE_000.001]: Directory not empty
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;with it, the same test passed.&lt;/p&gt;</comment>
                            <comment id="32074" author="yong.fan" created="Sat, 24 Mar 2012 13:45:42 +0000"  >&lt;p&gt;I think the two kernel patches from Bobijam can explain the failure cases we met. How do you think? Although those kernel issues have been fixed in newer kernel, as the temporary solution, we can add some entries in Changlog to tell the users how to resolve the issues against current kernel.&lt;/p&gt;</comment>
                            <comment id="32086" author="adilger" created="Sat, 24 Mar 2012 18:01:22 +0000"  >&lt;p&gt;I think it would be best to patch the server kernel with the needed fixes, and if someone is doing NFS export they need to use the Lustre server kernel to do this until such a time that this problem is fixed in the vendor kernel.&lt;/p&gt;

&lt;p&gt;We should also file a bug with RHEL and SLES that these fixes should be back-ported to their distro kernels so that the patches can hopefully be dropped quickly.&lt;/p&gt;</comment>
                            <comment id="38621" author="yujian" created="Fri, 11 May 2012 08:40:21 +0000"  >&lt;p&gt;Lustre Build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-b1_8/194/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-b1_8/194/&lt;/a&gt;&lt;br/&gt;
Distro/Arch: RHEL5.8/x86_64(server), RHEL6.2/x86_64(client, kernel 2.6.32-220.13.1.el6)&lt;br/&gt;
Network: TCP (1GigE)&lt;br/&gt;
ENABLE_QUOTA=yes&lt;/p&gt;

&lt;p&gt;The same issue occurred: &lt;a href=&quot;https://maloo.whamcloud.com/test_sets/561adeca-9b3f-11e1-a0a0-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sets/561adeca-9b3f-11e1-a0a0-52540035b04c&lt;/a&gt;&lt;/p&gt;</comment>
                            <comment id="39321" author="pjones" created="Thu, 24 May 2012 08:56:30 +0000"  >&lt;p&gt;This should be addressed by the kernel update tracked under &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-1424&quot; title=&quot;Kernel update [RHEL6.2 2.6.32-220.17.1.el6]&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-1424&quot;&gt;&lt;del&gt;LU-1424&lt;/del&gt;&lt;/a&gt; and landed for 1.8.8, 2.1.2 and 2.3&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10010">
                    <name>Duplicate</name>
                                                                <inwardlinks description="is duplicated by">
                                        <issuelink>
            <issuekey id="13171">LU-1100</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzv59b:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>4385</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>