<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 01:29:46 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-2963] fail to create large stripe count file with -ENOSPC error</title>
                <link>https://jira.whamcloud.com/browse/LU-2963</link>
                <project id="10000" key="LU">Lustre</project>
                    <description></description>
                <environment></environment>
        <key id="17896">LU-2963</key>
            <summary>fail to create large stripe count file with -ENOSPC error</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="3" iconUrl="https://jira.whamcloud.com/images/icons/priorities/major.svg">Major</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="3">Duplicate</resolution>
                                        <assignee username="yujian">Jian Yu</assignee>
                                    <reporter username="simmonsja">James A Simmons</reporter>
                        <labels>
                    </labels>
                <created>Thu, 14 Mar 2013 10:27:40 +0000</created>
                <updated>Tue, 6 May 2014 13:51:13 +0000</updated>
                            <resolved>Tue, 6 May 2014 13:48:28 +0000</resolved>
                                    <version>Lustre 2.4.0</version>
                                                        <due></due>
                            <votes>0</votes>
                                    <watches>8</watches>
                                                                            <comments>
                            <comment id="54016" author="simmonsja" created="Thu, 14 Mar 2013 10:31:45 +0000"  >&lt;p&gt;Durning testing we did some runs creating directories of increasing stripe count. What was discovered was that around 128 stripes that the files being created would fail with a -ENOSPC no matter what size the file was. This test was also done with 1.8 clients and we saw no such problems. Also durning the runs with the 2.4 clients attempts to do a lfs getstripe on the large stripe count directory would lock up.&lt;/p&gt;</comment>
                            <comment id="54020" author="simmonsja" created="Thu, 14 Mar 2013 11:22:50 +0000"  >&lt;p&gt;He is a log from the MDS whne I attempted to use lfs getstripe and it hung.&lt;/p&gt;

&lt;p&gt;Mar  8 21:12:54 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;27048.704079&amp;#93;&lt;/span&gt; LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) ### lock callback timer expired after 379s: evicting client at 3167@gni  ns: mdt-ffff880e55b20000 lock: ffff880c3f748600/0x4453d516b35bd568 lrc: 3/0,0 mode: CR/CR res: 8589939214/48084 bits 0x9 rrc: 14 type: IBT flags: 0x200000000020 nid: 3167@gni remote: 0x1f865dc509f72511 expref: 11 pid: 28552 timeout: 4321715043 lvb_type: 0&lt;br/&gt;
Mar  8 21:12:54 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;27048.807514&amp;#93;&lt;/span&gt; LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) ### lock callback timer expired after 379s: evicting client at 3160@gni  ns: mdt-ffff880e55b20000 lock: ffff880c91423800/0x4453d516b35bd481 lrc: 3/0,0 mode: CR/CR res: 8589939214/48084 bits 0x9 rrc: 14 type: IBT flags: 0x200000000020 nid: 3160@gni remote: 0x13f59fd0192bc693 expref: 9 pid: 28131 timeout: 4321715043 lvb_type: 0&lt;br/&gt;
Mar  8 21:19:15 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;27429.704085&amp;#93;&lt;/span&gt; LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) ### lock callback timer expired after 379s: evicting client at 4524@gni  ns: mdt-ffff880e55b20000 lock: ffff880cecfe8800/0x4453d516b35c5e97 lrc: 3/0,0 mode: CR/CR res: 8589937079/12 bits 0x9 rrc: 36 type: IBT flags: 0x200000000020 nid: 4524@gni remote: 0x3f5d4900776bce4c expref: 16 pid: 28093 timeout: 4322096096 lvb_type: 0&lt;br/&gt;
Mar  8 21:19:15 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;27429.808017&amp;#93;&lt;/span&gt; LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) Skipped 11 previous similar messages&lt;br/&gt;
Mar  8 21:25:39 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;27813.704166&amp;#93;&lt;/span&gt; LustreError: 49:0:(ldlm_lockd.c:391:waiting_locks_callback()) ### lock callback timer expired after 384s: evicting client at 3158@gni  ns: mdt-ffff880e55b20000 lock: ffff880b58904000/0x4453d516b35ca546 lrc: 3/0,0 mode: CR/CR res: 8589936085/14 bits 0x9 rrc: 5 type: IBT flags: 0x200000000020 nid: 3158@gni remote: 0x72cd27e494baa553 expref: 16 pid: 28882 timeout: 4322480792 lvb_type: 0&lt;br/&gt;
Mar  8 21:25:39 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;27813.808394&amp;#93;&lt;/span&gt; LustreError: 49:0:(ldlm_lockd.c:391:waiting_locks_callback()) Skipped 34 previous similar messages&lt;br/&gt;
Mar  8 21:31:39 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;28173.189536&amp;#93;&lt;/span&gt; Lustre: routed1-MDT0000: haven&apos;t heard from client 61445347-9977-82cd-59dd-430903b6625f (at 10.36.227.198@o2ib) in 1366 seconds. I think it&apos;s dead, and I am evicting it. exp ffff880d79545c00, cur 1362796299 expire 1362795399 last 1362794933&lt;br/&gt;
Mar  8 21:31:39 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;28173.251441&amp;#93;&lt;/span&gt; Lustre: Skipped 4 previous similar messages&lt;br/&gt;
Mar  8 21:47:52 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;29146.704092&amp;#93;&lt;/span&gt; LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) ### lock callback timer expired after 379s: evicting client at 1626@gni  ns: mdt-ffff880e55b20000 lock: ffff880d2c019600/0x4453d516b365e418 lrc: 3/0,0 mode: CR/CR res: 8589939214/48110 bits 0x9 rrc: 14 type: IBT flags: 0x200000000020 nid: 1626@gni remote: 0xdbdb318b1cc3700e expref: 10 pid: 29051 timeout: 4323813395 lvb_type: 0&lt;br/&gt;
Mar  8 21:47:52 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;29146.809942&amp;#93;&lt;/span&gt; LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) Skipped 3 previous similar messages&lt;br/&gt;
Mar  8 21:54:13 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;29527.704103&amp;#93;&lt;/span&gt; LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) ### lock callback timer expired after 379s: evicting client at 4582@gni  ns: mdt-ffff880e55b20000 lock: ffff880b80052400/0x4453d516b3666b48 lrc: 3/0,0 mode: CR/CR res: 8589937079/16 bits 0x9 rrc: 37 type: IBT flags: 0x200000000020 nid: 4582@gni remote: 0xbe8a1154e07f5e4f expref: 14 pid: 28963 timeout: 4324194184 lvb_type: 0&lt;br/&gt;
Mar  8 21:54:13 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;29527.810409&amp;#93;&lt;/span&gt; LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) Skipped 12 previous similar messages&lt;br/&gt;
Mar  8 22:01:07 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;29941.704087&amp;#93;&lt;/span&gt; LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) ### lock callback timer expired after 414s: evicting client at 4581@gni  ns: mdt-ffff880e55b20000 lock: ffff880c3f697400/0x4453d516b366b0bc lrc: 3/0,0 mode: CR/CR res: 8589944660/2 bits 0x9 rrc: 11 type: IBT flags: 0x200000000020 nid: 4581@gni remote: 0x5cd35be174c032ea expref: 9 pid: 28346 timeout: 4324608334 lvb_type: 0&lt;br/&gt;
Mar  8 22:01:07 widow-mds1 kernel: &lt;span class=&quot;error&quot;&gt;&amp;#91;29941.810653&amp;#93;&lt;/span&gt; LustreError: 0:0:(ldlm_lockd.c:391:waiting_locks_callback()) Skipped 35 previous similar messages&lt;/p&gt;</comment>
                            <comment id="54152" author="pjones" created="Fri, 15 Mar 2013 19:41:27 +0000"  >&lt;p&gt;Minh&lt;/p&gt;

&lt;p&gt;Could you please see whether you are able to reproduce this?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="54257" author="mdiep" created="Mon, 18 Mar 2013 15:46:19 +0000"  >&lt;p&gt;Hi James,&lt;/p&gt;

&lt;p&gt;You said you ran create directory with increasing stripe count. Then you said at around 128 strips, &quot;files&quot; create failed. Did you try to create files under those directories after the directories created?&lt;/p&gt;</comment>
                            <comment id="54375" author="mdiep" created="Tue, 19 Mar 2013 15:39:12 +0000"  >&lt;p&gt;Due to &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2845&quot; title=&quot;NULL pointer deref in osp_precreate_thread()&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2845&quot;&gt;&lt;del&gt;LU-2845&lt;/del&gt;&lt;/a&gt; I haven&apos;t been able to setup wide-striping cluster with 200 osts from 2 oss&lt;/p&gt;</comment>
                            <comment id="54473" author="mdiep" created="Wed, 20 Mar 2013 15:54:29 +0000"  >&lt;p&gt;Hi James,&lt;/p&gt;

&lt;p&gt;I have setup 2 oss with 300 ost each but could not reproduce the problem you are seeing. Could you please try the latest build on lustre-master since there have been many fixes went in lately.&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;</comment>
                            <comment id="54477" author="simmonsja" created="Wed, 20 Mar 2013 16:32:13 +0000"  >&lt;p&gt;Setting up a smaller scale test system to reproduce.&lt;/p&gt;</comment>
                            <comment id="54556" author="simmonsja" created="Thu, 21 Mar 2013 12:22:52 +0000"  >&lt;p&gt;While attempting to setup a system to reproduce this I ran into &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3004&quot; title=&quot;MGS llog choke on to much parameter data&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3004&quot;&gt;&lt;del&gt;LU-3004&lt;/del&gt;&lt;/a&gt; which is blocking me.&lt;/p&gt;</comment>
                            <comment id="54653" author="simmonsja" created="Fri, 22 Mar 2013 13:40:52 +0000"  >&lt;p&gt;Managed to get a 224 stripe count system up. So far I haven&apos;t been able to reproduce the problem. Please don&apos;t close the ticket until our next test shot in the latter part of April when we can make sure that this is fixed.&lt;/p&gt;</comment>
                            <comment id="54661" author="mdiep" created="Fri, 22 Mar 2013 14:56:29 +0000"  >&lt;p&gt;James, just curious, what is the memory size on your oss? I am hitting OOM when I tried to mount 300 OSTs, stopping at around 241&lt;/p&gt;</comment>
                            <comment id="54663" author="simmonsja" created="Fri, 22 Mar 2013 15:49:29 +0000"  >&lt;p&gt;I originally tried 448 OSTs but I also hit a OOM as well. I reduced my to 224 and that worked for me. The OSS I&apos;m working with has 16GB of ram.&lt;/p&gt;</comment>
                            <comment id="54778" author="pjones" created="Mon, 25 Mar 2013 18:08:13 +0000"  >&lt;p&gt;Dropping in priority as unable to reproduce this issue on the latest master. Will raise it in priority again if it reoccurs.&lt;/p&gt;</comment>
                            <comment id="55272" author="simmonsja" created="Tue, 2 Apr 2013 11:47:27 +0000"  >&lt;p&gt;Good news and bad news. I can now duplicate this problem with the latest 2.3.63. What is the best debug settings to track down this problem?&lt;/p&gt;</comment>
                            <comment id="55283" author="mdiep" created="Tue, 2 Apr 2013 13:45:04 +0000"  >&lt;p&gt;James, could you provide lustre debug log with debug=-1? how long does it take to produce this?&lt;/p&gt;</comment>
                            <comment id="55284" author="simmonsja" created="Tue, 2 Apr 2013 13:52:01 +0000"  >&lt;p&gt;With my IOR job less than a minute to reproduce. I&apos;m going to setup the debug script now.&lt;/p&gt;</comment>
                            <comment id="55292" author="simmonsja" created="Tue, 2 Apr 2013 14:54:11 +0000"  >&lt;p&gt;Uploaded one clients log to ftp.whamcloud.com/uploads/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2963&quot; title=&quot;fail to create large stripe count file with -ENOSPC error&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2963&quot;&gt;&lt;del&gt;LU-2963&lt;/del&gt;&lt;/a&gt;/lustre-log.c0-0c0s7n0.txt&lt;/p&gt;

&lt;p&gt;I have more client logs if you need them.&lt;/p&gt;</comment>
                            <comment id="55300" author="mdiep" created="Tue, 2 Apr 2013 15:57:55 +0000"  >&lt;p&gt;how many oss and ost? how many client did you use for IOR? I&apos;d like to try to reproduce this in the lab&lt;/p&gt;</comment>
                            <comment id="55302" author="simmonsja" created="Tue, 2 Apr 2013 16:10:03 +0000"  >&lt;p&gt;Here are my scripts to create a file system with the config (testfs-barry-224.conf). new-build formats the file system and new-lustre-start mounts it. Hostlist is used to handle the pdsh format listing of the devices and servers. I also attached my ior job scripts I used to run my IOR job with.&lt;/p&gt;</comment>
                            <comment id="55303" author="simmonsja" created="Tue, 2 Apr 2013 16:10:58 +0000"  >&lt;p&gt;4 OSS each with 56 OST to give a total of 224 OSTs. This is a LVM setup were each OSS has 7 real OSTs. The client side I ran IOR across 18 cray computes. I attached all my setup scripts and the job script I ran with. You will need to adapt for your system.&lt;/p&gt;
</comment>
                            <comment id="55612" author="simmonsja" created="Fri, 5 Apr 2013 16:43:29 +0000"  >&lt;p&gt;Any updates?&lt;/p&gt;</comment>
                            <comment id="55629" author="mdiep" created="Fri, 5 Apr 2013 17:48:31 +0000"  >&lt;p&gt;Sorry, no. I hit &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-3102&quot; title=&quot;kernel BUG at fs/jbd2/transaction.c:1033&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-3102&quot;&gt;&lt;del&gt;LU-3102&lt;/del&gt;&lt;/a&gt; when I tried to reproduce. If it&apos;s easy for you to reproduce, could you upload servers and clients debug log (with debug=-1)?&lt;/p&gt;</comment>
                            <comment id="55720" author="simmonsja" created="Mon, 8 Apr 2013 11:34:39 +0000"  >&lt;p&gt;I have a client log uploaded already at ftp.whamcloud.com/uploads/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2963&quot; title=&quot;fail to create large stripe count file with -ENOSPC error&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2963&quot;&gt;&lt;del&gt;LU-2963&lt;/del&gt;&lt;/a&gt;/lustre-log.c0-0c0s7n0.txt. I can get the server logs as well.&lt;/p&gt;</comment>
                            <comment id="55738" author="mdiep" created="Mon, 8 Apr 2013 14:33:57 +0000"  >&lt;p&gt;yes, please upload the server logs too. thanks&lt;/p&gt;</comment>
                            <comment id="55769" author="simmonsja" created="Mon, 8 Apr 2013 17:53:28 +0000"  >&lt;p&gt;I need to build a new large stripe count file system for other test so I will get you new logs.&lt;/p&gt;</comment>
                            <comment id="56129" author="simmonsja" created="Thu, 11 Apr 2013 18:32:22 +0000"  >&lt;p&gt;I rebased to the latest master and now I can&apos;t reproduce this bug. I have a feeling some of the layout patches that were merged the 8th fixed this issue. Please leave this ticket open until after our test shot which will take place tomorrow.&lt;/p&gt;</comment>
                            <comment id="56750" author="simmonsja" created="Mon, 22 Apr 2013 22:58:12 +0000"  >&lt;p&gt;During the last test shot we encountered this bug again. This time we got logs from the clients and servers. I uploaded all the logs to &lt;/p&gt;

&lt;p&gt;ftp.whamcloud.com/uploads/&lt;a href=&quot;https://jira.whamcloud.com/browse/LU-2963&quot; title=&quot;fail to create large stripe count file with -ENOSPC error&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-2963&quot;&gt;&lt;del&gt;LU-2963&lt;/del&gt;&lt;/a&gt;/testshot-4-12-2013&lt;/p&gt;</comment>
                            <comment id="56758" author="pjones" created="Mon, 22 Apr 2013 23:53:04 +0000"  >&lt;p&gt;Yu, Jian&lt;/p&gt;

&lt;p&gt;Could you please review this latest information from ORNL?&lt;/p&gt;

&lt;p&gt;Thanks&lt;/p&gt;

&lt;p&gt;Peter&lt;/p&gt;</comment>
                            <comment id="57144" author="adilger" created="Fri, 26 Apr 2013 17:41:56 +0000"  >&lt;p&gt;Sorry, haven&apos;t looked at the logs yet.  My gut feeling is that the -ENOSPC is being returned from the server, and either from the journal layer or from the xattrs, since these are the few places that are affected by growing stripe count.&lt;/p&gt;

&lt;p&gt;James, first question - is the &quot;large_xattr&quot; feature enabled on your MDS?  This is still not being enabled by default (see &lt;a href=&quot;http://review.whamcloud.com/4315&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/4315&lt;/a&gt;), since the patch has not been accepted upstream yet, and it makes sense to limit the feature exposure to sites that actually need it.  This can be set any time during formatting (via mkfsoptions) or after (via tune2fs)  with &quot;-O large_xattr&quot;.&lt;/p&gt;</comment>
                            <comment id="57161" author="di.wang" created="Fri, 26 Apr 2013 20:44:38 +0000"  >&lt;p&gt;James: I just checked the debug log, I did not find mds log there? Just want to confirm, the bug you hit in the last test is still &quot;-NOSPC when you try to create a file with 224 stripes?&lt;/p&gt;</comment>
                            <comment id="57186" author="yujian" created="Sat, 27 Apr 2013 11:41:52 +0000"  >&lt;p&gt;Lustre Branch: master&lt;br/&gt;
Lustre Build: &lt;a href=&quot;http://build.whamcloud.com/job/lustre-master/1441/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://build.whamcloud.com/job/lustre-master/1441/&lt;/a&gt;&lt;br/&gt;
Distro/Arch: RHEL6.4/x86_64 (kernel version: 2.6.32-358.2.1.el6)&lt;br/&gt;
Network: TCP (1GigE)&lt;br/&gt;
OSSCOUNT=4&lt;br/&gt;
OSTCOUNT=224 (with 56 OSTs per OSS)&lt;/p&gt;

&lt;p&gt;MDSOPT=&quot;--mkfsoptions=&apos;-O large_xattr&apos;&quot;&lt;/p&gt;

&lt;p&gt;The parallel-scale test iorssf passed with 224 OSTs:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sessions/ce2253de-af21-11e2-8f8e-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sessions/ce2253de-af21-11e2-8f8e-52540035b04c&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;As per run_ior() in lustre/tests/functions.sh, &quot;$LFS setstripe $testdir -c -1&quot; was performed before running the IOR command.&lt;/p&gt;

&lt;p&gt;Another test run with MDSOPT=&quot;--mkfsoptions=&apos;-O large_xattr -J size=1024&apos;&quot; also passed:&lt;br/&gt;
&lt;a href=&quot;https://maloo.whamcloud.com/test_sessions/94998110-af57-11e2-8f8e-52540035b04c&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://maloo.whamcloud.com/test_sessions/94998110-af57-11e2-8f8e-52540035b04c&lt;/a&gt;&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;+ /usr/bin/lfs setstripe /mnt/lustre/d0.ior.ssf -c -1
+ /usr/bin/lfs getstripe -d /mnt/lustre/d0.ior.ssf
stripe_count:   -1 stripe_size:    1048576 stripe_offset:  -1 
+ /usr/bin/IOR -a POSIX -C -g -b 1g -o /mnt/lustre/d0.ior.ssf/iorData -t 4m -v -e -w -r -i 5 -k
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;More tests passed:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;# ls -l /mnt/lustre/
total 0
# lfs setstripe -c 224 /mnt/lustre/file
# lfs getstripe -i -c -s /mnt/lustre/file
lmm_stripe_count:   224
lmm_stripe_size:    1048576
lmm_stripe_offset:  133
# yes | dd bs=1024 count=1048576 of=/mnt/lustre/file
1048576+0 records in
1048576+0 records out
1073741824 bytes (1.1 GB) copied, 1288.4 s, 833 kB/s
# lfs getstripe -i -c -s /mnt/lustre/file
lmm_stripe_count:   224
lmm_stripe_size:    1048576
lmm_stripe_offset:  133

# mkdir /mnt/lustre/dir
# lfs getstripe -d /mnt/lustre/dir
stripe_count:   1 stripe_size:    1048576 stripe_offset:  -1
# lfs setstripe -c 224 /mnt/lustre/dir
# lfs getstripe -d /mnt/lustre/dir
stripe_count:   224 stripe_size:    1048576 stripe_offset:  -1
# touch /mnt/lustre/dir/file
# lfs getstripe -i -c -s /mnt/lustre/dir/file
lmm_stripe_count:   224
lmm_stripe_size:    1048576
lmm_stripe_offset:  189
# yes | dd bs=1024 count=1048576 of=/mnt/lustre/dir/file
1048576+0 records in
1048576+0 records out
1073741824 bytes (1.1 GB) copied, 1359.48 s, 790 kB/s
# lfs getstripe -i -c -s /mnt/lustre/dir/file
lmm_stripe_count:   224
lmm_stripe_size:    1048576
lmm_stripe_offset:  189
# lfs getstripe -d /mnt/lustre/dir
stripe_count:   224 stripe_size:    1048576 stripe_offset:  -1
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                            <comment id="57227" author="simmonsja" created="Mon, 29 Apr 2013 11:48:10 +0000"  >&lt;p&gt;Sorry about the confusion with this ticket. When I created this ticket for our first test shot this problem was only observed during our hero wide stripe test with 367 ost at the time. After that test shot I opened this ticket and prepared a scaling job that would create directories with powers of two stripe count. So for the second test shot we ran this scaling job to discover that the failure happened around 128 stripes which is below the old 160 stripe limit. For this last test shot run we again saw this problem not only at larger stripe count (128 stripes again) but also for single shared file that was stripe across 4 OSTs. This shared file was being written to by 18K number of nodes. So I don&apos;t think it is a general wide stripe problem we are seeing but some other issue. We thought it might of been a grant issues since the OSTs are only 250 GB in size but Oleg told me during LUG this is unlikely the case.&lt;/p&gt;

&lt;p&gt;P.S&lt;br/&gt;
        I can&apos;t seem to find the MDS ldump &lt;img class=&quot;emoticon&quot; src=&quot;https://jira.whamcloud.com/images/icons/emoticons/sad.png&quot; height=&quot;16&quot; width=&quot;16&quot; align=&quot;absmiddle&quot; alt=&quot;&quot; border=&quot;0&quot;/&gt; Will  talk to the admin tomorrow.&lt;/p&gt;</comment>
                            <comment id="57260" author="adilger" created="Mon, 29 Apr 2013 19:57:23 +0000"  >&lt;p&gt;James,&lt;br/&gt;
this ENOSPC problem may only be related to your test configuration, if there are individual OSTs that are out of space for some reason.  Creating a file with specific striping will fail if it can&apos;t allocate at least 3/4 of the requested stripes (some margin is allowed so that applications don&apos;t get failures when a small number of OSTs are offline).&lt;/p&gt;

&lt;p&gt;Is it possible that earlier in your testing that some OSTs were filled up?&lt;/p&gt;</comment>
                            <comment id="57430" author="simmonsja" created="Wed, 1 May 2013 13:13:11 +0000"  >&lt;p&gt;For the last test shot we had to reformat the file system due to the changes in the fid format. After mounting the file system I always run the large stripe job first.&lt;/p&gt;</comment>
                            <comment id="77294" author="jamesanunez" created="Tue, 18 Feb 2014 22:03:12 +0000"  >&lt;p&gt;James, &lt;/p&gt;

&lt;p&gt;Have you run this large stripe job recently and, if so, are you still seeing this problem?&lt;/p&gt;

&lt;p&gt;Thanks, &lt;br/&gt;
James&lt;/p&gt;</comment>
                            <comment id="77357" author="simmonsja" created="Wed, 19 Feb 2014 14:50:19 +0000"  >&lt;p&gt;The problem was large_xattr was not set on the MDS. That was resolved. What is not resolved is that when large stripe count is not set then the default LOV_MAX_STRIPE is not 160 but something less due to changes in the data being sent over wire.&lt;/p&gt;</comment>
                            <comment id="81386" author="jamesanunez" created="Thu, 10 Apr 2014 17:01:08 +0000"  >&lt;p&gt;James, &lt;/p&gt;

&lt;p&gt;The patch for &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4791&quot; title=&quot;lod_ah_init() ASSERTION( lc-&amp;gt;ldo_stripenr == 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4791&quot;&gt;&lt;del&gt;LU-4791&lt;/del&gt;&lt;/a&gt; has landed to master and there is a b2_4 patch available that has not landed yet. If you are able, please test with the &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4791&quot; title=&quot;lod_ah_init() ASSERTION( lc-&amp;gt;ldo_stripenr == 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4791&quot;&gt;&lt;del&gt;LU-4791&lt;/del&gt;&lt;/a&gt; patch and see if it fixes this issue. &lt;/p&gt;

&lt;p&gt;Thank you.&lt;/p&gt;</comment>
                            <comment id="83286" author="simmonsja" created="Tue, 6 May 2014 13:01:15 +0000"  >&lt;p&gt;Excellent news. The patch from &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-4791&quot; title=&quot;lod_ah_init() ASSERTION( lc-&amp;gt;ldo_stripenr == 0 ) failed:&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-4791&quot;&gt;&lt;del&gt;LU-4791&lt;/del&gt;&lt;/a&gt; fixes this issue. You can close this ticket.&lt;/p&gt;</comment>
                            <comment id="83292" author="pjones" created="Tue, 6 May 2014 13:48:28 +0000"  >&lt;p&gt;That is excellent news - thanks James!&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="23810">LU-4791</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="12464" name="hostlist.sh" size="2838" author="simmonsja" created="Tue, 2 Apr 2013 16:10:03 +0000"/>
                            <attachment id="12460" name="ior-lsc.pbs" size="1012" author="simmonsja" created="Tue, 2 Apr 2013 16:10:03 +0000"/>
                            <attachment id="12462" name="new-build.sh" size="4245" author="simmonsja" created="Tue, 2 Apr 2013 16:10:03 +0000"/>
                            <attachment id="12461" name="new-lustre-start.sh" size="5949" author="simmonsja" created="Tue, 2 Apr 2013 16:10:03 +0000"/>
                            <attachment id="12463" name="testfs-barry-224.conf" size="603" author="simmonsja" created="Tue, 2 Apr 2013 16:10:03 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzvldb:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>7227</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>