<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:50:30 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-12198] lnetctl peer show hangs for ~2600 clients, ioctl getting E2BIG</title>
                <link>https://jira.whamcloud.com/browse/LU-12198</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;command `lnetctl peer show` appears to hang, strace shows looping on:&lt;/p&gt;
&lt;div class=&quot;preformatted panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;preformattedContent panelContent&quot;&gt;
&lt;pre&gt;ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x65, 0x64, 0xb8), 0x7fffffffccf0) = -1 E2BIG (Argument list too long)
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;There are 2605 lines in /sys/kernel/debug/lnet/peers.&lt;/p&gt;

</description>
                <environment>x86 servers, 2.12 no patches, RHEL 7.6</environment>
        <key id="55440">LU-12198</key>
            <summary>lnetctl peer show hangs for ~2600 clients, ioctl getting E2BIG</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="martinetd">Dominique Martinet</assignee>
                                    <reporter username="ruth.klundt@gmail.com">Ruth Klundt</reporter>
                        <labels>
                    </labels>
                <created>Thu, 18 Apr 2019 20:19:05 +0000</created>
                <updated>Mon, 22 Jun 2020 17:46:17 +0000</updated>
                            <resolved>Tue, 25 Feb 2020 14:21:51 +0000</resolved>
                                    <version>Lustre 2.12.0</version>
                                    <fixVersion>Lustre 2.14.0</fixVersion>
                    <fixVersion>Lustre 2.12.5</fixVersion>
                                        <due></due>
                            <votes>1</votes>
                                    <watches>10</watches>
                                                                            <comments>
                            <comment id="246079" author="sharmaso" created="Fri, 19 Apr 2019 13:07:22 +0000"  >&lt;p&gt;Yes, this is because we have this limit of 1000 peers while allocating buffer to get the peer list-&#160;&lt;/p&gt;


&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
2996 &#160; &#160; &#160; &#160; count = 1000; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
2997 &#160; &#160; &#160; &#160; size = count * sizeof(struct lnet_process_id);&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
2998 &#160; &#160; &#160; &#160; list = malloc(size);
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;
009 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; LIBCFS_IOC_INIT_V2(peer_info, prcfg_hdr); &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
3010 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; peer_info.prcfg_hdr.ioc_len = sizeof(peer_info);&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
3011 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; peer_info.prcfg_size = size;&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
3012 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; peer_info.prcfg_bulk = list;&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
3013 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
3014 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; l_errno = 0;&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
3015 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_PEER_LIST, &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
3016  &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &amp;amp;peer_info); &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
3017 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; count = peer_info.prcfg_count;&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
3018 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;if&lt;/span&gt; (rc == 0)&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
3019 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &lt;span class=&quot;code-keyword&quot;&gt;break&lt;/span&gt;;&#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160;
3020 &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; &#160; l_errno = errno;
&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;

&lt;p&gt;&#160;&lt;/p&gt;</comment>
                            <comment id="246176" author="ruth.klundt@gmail.com" created="Mon, 22 Apr 2019 20:33:01 +0000"  >&lt;p&gt;fyi a bit further down the code appears to retry with count and size returned from ioctl. those values must not be getting across correctly though because the call loops indefinitely. &lt;/p&gt;</comment>
                            <comment id="254226" author="mhanafi" created="Thu, 5 Sep 2019 23:39:53 +0000"  >&lt;p&gt;We are hitting this issue on our routers.&lt;/p&gt;

&lt;p&gt;&#160;lnetctl peer show&lt;/p&gt;

&lt;p&gt;will hang and strace show&#160;&lt;/p&gt;

&lt;p&gt;&#160;ioctl(3, _IOC(_IOC_READ|_IOC_WRITE, 0x65, 0x64, 0xb8), 0x7fffffffe6b0) = -1 E2BIG (Argument list too long)&lt;/p&gt;</comment>
                            <comment id="263227" author="martinetd" created="Thu, 13 Feb 2020 13:25:29 +0000"  >&lt;p&gt;The problem is that the &lt;tt&gt;hdr&lt;/tt&gt; header in kernel does not go back to userspace on error, so when &lt;tt&gt;lnet_get_peer_list&lt;/tt&gt; writes back to &lt;tt&gt;*sizep&lt;/tt&gt; it stays in kernel and does not fill back the value for lnetctl to grow the buffer.&lt;/p&gt;</comment>
                            <comment id="263229" author="gerrit" created="Thu, 13 Feb 2020 13:37:23 +0000"  >&lt;p&gt;Dominique Martinet (dominique.martinet@cea.fr) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/37559&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37559&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12198&quot; title=&quot;lnetctl peer show hangs for ~2600 clients, ioctl getting E2BIG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12198&quot;&gt;&lt;del&gt;LU-12198&lt;/del&gt;&lt;/a&gt; libcfs: always copy ioctl header back to user&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 818fc691a0e29e5764bfcd65d2a1918c5369fe7c&lt;/p&gt;</comment>
                            <comment id="263969" author="gerrit" created="Tue, 25 Feb 2020 05:51:47 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/37559/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37559/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12198&quot; title=&quot;lnetctl peer show hangs for ~2600 clients, ioctl getting E2BIG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12198&quot;&gt;&lt;del&gt;LU-12198&lt;/del&gt;&lt;/a&gt; libcfs: always copy ioctl header back to user&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 9e02ef474f8caa833d6a1b5e0068d5323a57e8c4&lt;/p&gt;</comment>
                            <comment id="264023" author="pjones" created="Tue, 25 Feb 2020 14:21:51 +0000"  >&lt;p&gt;Landed for 2.14&lt;/p&gt;</comment>
                            <comment id="264026" author="gerrit" created="Tue, 25 Feb 2020 14:47:07 +0000"  >&lt;p&gt;Minh Diep (mdiep@whamcloud.com) uploaded a new patch: &lt;a href=&quot;https://review.whamcloud.com/37720&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37720&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12198&quot; title=&quot;lnetctl peer show hangs for ~2600 clients, ioctl getting E2BIG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12198&quot;&gt;&lt;del&gt;LU-12198&lt;/del&gt;&lt;/a&gt; libcfs: always copy ioctl header back to user&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: 321a757880445089a48d26acfe0554853750ca3f&lt;/p&gt;</comment>
                            <comment id="266369" author="mhanafi" created="Mon, 30 Mar 2020 23:24:18 +0000"  >&lt;p&gt;It would be nice if this could land in the next 2.12.x.&lt;/p&gt;</comment>
                            <comment id="266979" author="gerrit" created="Mon, 6 Apr 2020 21:17:17 +0000"  >&lt;p&gt;Oleg Drokin (green@whamcloud.com) merged in patch &lt;a href=&quot;https://review.whamcloud.com/37720/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;https://review.whamcloud.com/37720/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-12198&quot; title=&quot;lnetctl peer show hangs for ~2600 clients, ioctl getting E2BIG&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-12198&quot;&gt;&lt;del&gt;LU-12198&lt;/del&gt;&lt;/a&gt; libcfs: always copy ioctl header back to user&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: b2_12&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: a3c687a943233a7c5ae7e3fb906d1913b063c95c&lt;/p&gt;</comment>
                            <comment id="273476" author="joe.grund" created="Mon, 22 Jun 2020 16:46:15 +0000"  >&lt;p&gt;I&apos;m seeing something similar to this while calling both&lt;/p&gt;

&lt;p&gt;&lt;tt&gt;lnetctl export&lt;/tt&gt;&lt;/p&gt;

&lt;p&gt;and &lt;/p&gt;

&lt;p&gt;&lt;tt&gt;lnetctl peer show&lt;/tt&gt;&lt;/p&gt;

&lt;p&gt;These commands hang and running strace shows the above.&lt;/p&gt;</comment>
                            <comment id="273486" author="simmonsja" created="Mon, 22 Jun 2020 17:46:17 +0000"  >&lt;p&gt;Is this with 2.14?&#160; I suspect we are reaching the limits of using ioctls.&lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10011">
                    <name>Related</name>
                                            <outwardlinks description="is related to ">
                                        <issuelink>
            <issuekey id="46759">LU-9680</issuekey>
        </issuelink>
                            </outwardlinks>
                                                        </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|i00f1z:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>