<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 03:30:22 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-16830] mdtest SEL jobs aborted with ENOSPC during automated FOFB testing</title>
                <link>https://jira.whamcloud.com/browse/LU-16830</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;lod_ost_alloc_rr() has 3 speed loops (speed 0, 1, 2). At 0,1 it loops OSTs without waiting on objects. And at speed 2 it waits obd_timeout(60s) for OST object creation.&lt;br/&gt;
Somehow 0 and 1 speed loops did not allocate stripes. And at speed 2, OST loop waited 4 times on OSTs, 60 seconds each, and failed to allocate stripes also. Request took 240+ seconds to complete. And as result mdt_reint_open() finished with ENOSPC.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000004:00080000:5.0:1681399745.413238:0:52376:0:(osp_precreate.c:1552:osp_precreate_reserve()) snx11922-OST0001-osc-MDT0000: slow creates, last=[0x100010000:0x856c7ce1:0x0], next=[0x100010000:0x856c7ce1:0x0], reserved=0, sync_changes=0, sync_rpcs_in_progress=0, status=-19
00020000:00001000:5.0:1681399745.413246:0:52376:0:(lod_qos.c:415:lod_qos_declare_object_on()) can&apos;t declare creation on #1: -110
00020000:00001000:5.0:1681399745.413269:0:52376:0:(lod_qos.c:822:lod_ost_alloc_rr()) #3 strt 4 act 0 strp 0 ary 0 idx 0
00000004:00000040:5.0:1681399745.413304:0:52376:0:(osp_precreate.c:1538:osp_precreate_reserve()) snx11922-OST0000-osc-MDT0000: Sleeping on objects
00000004:00080000:5.0:1681399806.853255:0:52376:0:(osp_precreate.c:1552:osp_precreate_reserve()) snx11922-OST0000-osc-MDT0000: slow creates, last=[0x100000000:0x86f5beb8:0x0], next=[0x100000000:0x86f5beb8:0x0], reserved=0, sync_changes=0, sync_rpcs_in_progress=0, status=-19
00020000:00001000:5.0:1681399806.853278:0:52376:0:(lod_qos.c:415:lod_qos_declare_object_on()) can&apos;t declare creation on #0: -110
00000100:00100000:5.0:1681399806.853525:0:52376:0:(service.c:2368:ptlrpc_server_handle_request()) Handled RPC req@0000000032eefb47 pname:cluuid+ref:pid:xid:nid:opc:job mdt01_013:deadcc71-76dd-42b1-be5d-3b7c0cd701f9+310925:10959:x1762034564784640:12345-104@gni4:101: Request processed in 247686653us (247686679us total) trans 0 rc 301/301
00000100:00001000:5.0:1681399806.853545:0:52376:0:(service.c:2384:ptlrpc_server_handle_request()) @@@ sent 9 early replies before finishing in 247s  req@0000000032eefb47 x1762034564784640/t0(0) o101-&amp;gt;deadcc71-76dd-42b1-be5d-3b7c0cd701f9@104@gni4:316/0 lens 984/600 e 9 to 0 dl 1681399836 ref 1 fl Complete:/0/0 rc 301/301 job:&apos;&apos;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Here is enospc logs, without -28, but it is really it.&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;00000004:00080000:4.0:1681509372.244328:0:96679:0:(osp_precreate.c:1552:osp_precreate_reserve()) snx11922-OST0000-osc-MDT0001: slow creates, last=[0x280000400:0x9685b1fc:0x0], next=[0x280000400:0x9685b1fc:0x0], reserved=0, sync_changes=83616, sync_rpcs_in_progress=0, status=-19
00020000:00001000:4.0:1681509372.244340:0:96679:0:(lod_qos.c:415:lod_qos_declare_object_on()) can&apos;t declare creation on #0: -110
00020000:00001000:4.0:1681509372.244345:0:96679:0:(lod_qos.c:693:lod_check_and_reserve_ost()) can&apos;t declare new object on #0: -110
00020000:00001000:4.0:1681509372.244347:0:96679:0:(lod_qos.c:822:lod_ost_alloc_rr()) #1 strt 14 act 0 strp 0 ary 2 idx 1
00000004:00001000:4.0:1681509372.244351:0:96679:0:(osp_dev.c:806:osp_statfs()) snx11922-OST0001-osc-MDT0001: 30062590209 blocks, 20088647533 free, 19785479608 avail, 4096 bsize, 114679 reserved mb low, 229359 reserved mb high, 118423424 files, 82006030 free files
00000004:00000040:4.0:1681509372.244362:0:96679:0:(osp_precreate.c:1538:osp_precreate_reserve()) snx11922-OST0001-osc-MDT0001: Sleeping on objects
00000004:00080000:4.0:1681509433.684392:0:96679:0:(osp_precreate.c:1552:osp_precreate_reserve()) snx11922-OST0001-osc-MDT0001: slow creates, last=[0x300000400:0x953923b1:0x0], next=[0x300000400:0x953923b1:0x0], reserved=0, sync_changes=76494, sync_rpcs_in_progress=0, status=-19
00020000:00001000:4.0:1681509433.684395:0:96679:0:(lod_qos.c:415:lod_qos_declare_object_on()) can&apos;t declare creation on #1: -110
00020000:00001000:4.0:1681509433.684396:0:96679:0:(lod_qos.c:693:lod_check_and_reserve_ost()) can&apos;t declare new object on #1: -110
00020000:00001000:4.0:1681509433.684397:0:96679:0:(lod_qos.c:822:lod_ost_alloc_rr()) #2 strt 12 act 0 strp 0 ary 0 idx 0
00000004:00001000:4.0:1681509433.684398:0:96679:0:(osp_dev.c:806:osp_statfs()) snx11922-OST0000-osc-MDT0001: 30062590209 blocks, 20054934974 free, 19751767042 avail, 4096 bsize, 114679 reserved mb low, 229359 reserved mb high, 118423424 files, 81874401 free files
00000004:00000040:4.0:1681509433.684401:0:96679:0:(osp_precreate.c:1538:osp_precreate_reserve()) snx11922-OST0000-osc-MDT0001: Sleeping on objects
00000004:00080000:4.0:1681509495.124293:0:96679:0:(osp_precreate.c:1552:osp_precreate_reserve()) snx11922-OST0000-osc-MDT0001: slow creates, last=[0x280000400:0x9685b1fc:0x0], next=[0x280000400:0x9685b1fc:0x0], reserved=0, sync_changes=83616, sync_rpcs_in_progress=0, status=-19
00020000:00001000:4.0:1681509495.124299:0:96679:0:(lod_qos.c:415:lod_qos_declare_object_on()) can&apos;t declare creation on #0: -110
00020000:00001000:4.0:1681509495.124301:0:96679:0:(lod_qos.c:693:lod_check_and_reserve_ost()) can&apos;t declare new object on #0: -110
00020000:00001000:4.0:1681509495.124302:0:96679:0:(lod_qos.c:822:lod_ost_alloc_rr()) #3 strt 6 act 0 strp 0 ary 2 idx 1
00000004:00001000:4.0:1681509495.124304:0:96679:0:(osp_dev.c:806:osp_statfs()) snx11922-OST0001-osc-MDT0001: 30062590209 blocks, 20088647533 free, 19785479608 avail, 4096 bsize, 114679 reserved mb low, 229359 reserved mb high, 118423424 files, 82006030 free files
00000004:00000040:4.0:1681509495.124310:0:96679:0:(osp_precreate.c:1538:osp_precreate_reserve()) snx11922-OST0001-osc-MDT0001: Sleeping on objects
00000004:00080000:4.0:1681509556.564304:0:96679:0:(osp_precreate.c:1552:osp_precreate_reserve()) snx11922-OST0001-osc-MDT0001: slow creates, last=[0x300000400:0x953923b1:0x0], next=[0x300000400:0x953923b1:0x0], reserved=0, sync_changes=76494, sync_rpcs_in_progress=0, status=-19
00020000:00001000:4.0:1681509556.564310:0:96679:0:(lod_qos.c:415:lod_qos_declare_object_on()) can&apos;t declare creation on #1: -110
00020000:00001000:4.0:1681509556.564313:0:96679:0:(lod_qos.c:693:lod_check_and_reserve_ost()) can&apos;t declare new object on #1: -110
00000004:00000040:4.0:1681509556.564324:0:96679:0:(mdt_handler.c:6369:mdt_object_free()) object free, fid = [0x2400f42e8:0xc2de:0x0]
00000004:00000040:4.0:1681509556.564342:0:96679:0:(mdt_lib.c:735:mdt_fix_reply()) Shrink to md_size = 0 cookie/acl_size = 0
00000100:00001000:4.0:1681509556.564350:0:96679:0:(import.c:1941:at_measured()) add 248 to 00000000f15f2540 time=111 v=253 (253 153 23 25)
00000100:00000040:4.0:1681509556.564352:0:96679:0:(connection.c:145:ptlrpc_connection_addref()) conn=00000000cd9dc6e2 refcount 3 to 112@gni4
00000100:00000040:4.0:1681509556.564355:0:96679:0:(niobuf.c:58:ptl_send_buf()) peer_id 12345-112@gni4
00000100:00000040:4.0:1681509556.564366:0:96679:0:(lustre_net.h:1945:ptlrpc_connection_put()) PUT conn=00000000cd9dc6e2 refcount 2 to 112@gni4
00000100:00000040:4.0:1681509556.564368:0:96679:0:(lustre_net.h:2404:ptlrpc_rqphase_move()) @@@ move request phase from Interpret to Complete  req@00000000d5ffd1e9 x1761995533613248/t0(0) o101-&amp;gt;00e2d691-47e0-4194-a67c-1aa5d855ebc6@112@gni4:456/0 lens 984/600 e 8 to 0 dl 1681509561 ref 1 fl Interpret:/0/0 rc 301/301 job:&apos;&apos;
00000100:00100000:4.0:1681509556.564373:0:96679:0:(service.c:2368:ptlrpc_server_handle_request()) Handled RPC req@00000000d5ffd1e9 pname:cluuid+ref:pid:xid:nid:opc:job mdt01_028:00e2d691-47e0-4194-a67c-1aa5d855ebc6+191645:4033:x1761995533613248:12345-112@gni4:101: Request processed in 247828965us (247829037us total) trans 0 rc 301/301
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt; It looks like lod_ost_alloc_rr() logic generates OST idx for allocation always equal to a failover indexes, 0 and 1.&lt;br/&gt;
I think I&apos;m seeing a problem with object allocation at sources.&lt;br/&gt;
with 2 out of 4 OSTs we have 12 retries, it is not totally right math since Lustre uses atomic_inc() % ost_count, but estimated probability is 0,5^12 = 0,024%&lt;/p&gt;</description>
                <environment></environment>
        <key id="76053">LU-16830</key>
            <summary>mdtest SEL jobs aborted with ENOSPC during automated FOFB testing</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="aboyko">Alexander Boyko</assignee>
                                    <reporter username="aboyko">Alexander Boyko</reporter>
                        <labels>
                            <label>patch</label>
                    </labels>
                <created>Mon, 15 May 2023 13:57:24 +0000</created>
                <updated>Thu, 28 Sep 2023 08:02:22 +0000</updated>
                            <resolved>Fri, 9 Jun 2023 12:38:55 +0000</resolved>
                                                    <fixVersion>Lustre 2.16.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>5</watches>
                                                                            <comments>
                            <comment id="372282" author="gerrit" created="Mon, 15 May 2023 14:03:24 +0000"  >&lt;p&gt;&quot;Alexander Boyko &amp;lt;alexander.boyko@hpe.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/50996&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/50996&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16830&quot; title=&quot;mdtest SEL jobs aborted with ENOSPC during automated FOFB testing&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16830&quot;&gt;&lt;del&gt;LU-16830&lt;/del&gt;&lt;/a&gt; lod: improve rr allocation&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 8df8df81b283b11ddf585ef2d95a49638ed956d3&lt;/p&gt;</comment>
                            <comment id="374934" author="gerrit" created="Fri, 9 Jun 2023 05:27:33 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/50996/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/50996/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16830&quot; title=&quot;mdtest SEL jobs aborted with ENOSPC during automated FOFB testing&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16830&quot;&gt;&lt;del&gt;LU-16830&lt;/del&gt;&lt;/a&gt; lod: improve rr allocation&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: cacdaa925172d26c19ee841dd5a2a4c30afe10ce&lt;/p&gt;</comment>
                            <comment id="374977" author="pjones" created="Fri, 9 Jun 2023 12:38:55 +0000"  >&lt;p&gt;Landed for 2.16&lt;/p&gt;</comment>
                            <comment id="386170" author="gerrit" created="Fri, 15 Sep 2023 15:51:49 +0000"  >&lt;p&gt;&quot;Alexander Boyko &amp;lt;alexander.boyko@hpe.com&amp;gt;&quot; uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/52393&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/52393&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16830&quot; title=&quot;mdtest SEL jobs aborted with ENOSPC during automated FOFB testing&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16830&quot;&gt;&lt;del&gt;LU-16830&lt;/del&gt;&lt;/a&gt; lod: accurate OSTs iteration for the last speed&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 9b2c8779850d56f114af36a53110088b7e6005dc&lt;/p&gt;</comment>
                            <comment id="387508" author="gerrit" created="Thu, 28 Sep 2023 08:02:22 +0000"  >&lt;p&gt;&quot;Oleg Drokin &amp;lt;green@whamcloud.com&amp;gt;&quot; merged in patch &lt;a href=&quot;https://review.whamcloud.com/c/fs/lustre-release/+/52393/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/c/fs/lustre-release/+/52393/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-16830&quot; title=&quot;mdtest SEL jobs aborted with ENOSPC during automated FOFB testing&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-16830&quot;&gt;&lt;del&gt;LU-16830&lt;/del&gt;&lt;/a&gt; lod: accurate OSTs iteration for the last speed&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 65debc32365a179b36847d12118e2ec6e1b5805f&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                                                <inwardlinks description="is related to">
                                        <issuelink>
            <issuekey id="73709">LU-16420</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i03lef:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>