<!-- 
RSS generated by JIRA (9.4.14#940014-sha1:734e6822bbf0d45eff9af51f82432957f73aa32c) at Sat Feb 10 02:16:56 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary append 'field=key&field=summary' to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>Whamcloud Community JIRA</title>
    <link>https://jira.whamcloud.com</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>9.4.14</version>
        <build-number>940014</build-number>
        <build-date>05-12-2023</build-date>
    </build-info>


<item>
            <title>[LU-8368] Use kgnilnd_vzalloc() for copy buffer allocation</title>
                <link>https://jira.whamcloud.com/browse/LU-8368</link>
                <project id="10000" key="LU">Lustre</project>
                    <description>&lt;p&gt;Description of the issue from James Shimek:&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;The node was essentially out of memory. I didn&apos;t see any allocation failures but running that close to the physical memory limit probably doesn&apos;t help it complete requests on time especially since kgnilnd is a bit finicky about memory. And it looks like instead of failing the IO we spin indefinitly due to the vmalloc in the rdma path not being able to fail.&lt;/p&gt;

&lt;p&gt;crash&amp;gt; kmem -i&lt;br/&gt;
                 PAGES        TOTAL      PERCENTAGE&lt;br/&gt;
    TOTAL MEM  8247725      31.5 GB         ----&lt;br/&gt;
         FREE    72580     283.5 MB    0% of TOTAL MEM&lt;br/&gt;
         USED  8175145      31.2 GB   99% of TOTAL MEM&lt;br/&gt;
       SHARED  5493193        21 GB   66% of TOTAL MEM&lt;br/&gt;
      BUFFERS        0            0    0% of TOTAL MEM&lt;br/&gt;
       CACHED  5881794      22.4 GB   71% of TOTAL MEM&lt;br/&gt;
         SLAB   384907       1.5 GB    4% of TOTAL MEM&lt;/p&gt;

&lt;p&gt;   TOTAL SWAP        0            0         ----&lt;br/&gt;
    SWAP USED        0            0  100% of TOTAL SWAP&lt;br/&gt;
    SWAP FREE        0            0    0% of TOTAL SWAP&lt;/p&gt;

&lt;p&gt; COMMIT LIMIT  4123862      15.7 GB         ----&lt;br/&gt;
    COMMITTED   515156         2 GB   12% of TOTAL LIMIT&lt;/p&gt;

&lt;p&gt;most of the memory seems to be being used by the kdwf system.&lt;br/&gt;
&amp;gt;crash&amp;gt; sys&lt;br/&gt;
&amp;gt;      KERNEL: service_cle_6.1.DV00-build6.1.70DV_sles_12-created20160324.cpio/DEFAULT/boot/vmlinux-3.12.51-52.39.1_1.0000.9086-cray_ari_s&lt;br/&gt;
&amp;gt;    DUMPFILE: c0-0c1s1n2-1603301901.cdump  &lt;span class=&quot;error&quot;&gt;&amp;#91;PARTIAL DUMP&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;        CPUS: 16&lt;br/&gt;
&amp;gt;        DATE: Wed Mar 30 19:00:49 2016&lt;br/&gt;
&amp;gt;      UPTIME: 1 days, 10:15:06&lt;br/&gt;
&amp;gt;LOAD AVERAGE: 84.00, 83.84, 69.30&lt;br/&gt;
&amp;gt;       TASKS: 1398&lt;br/&gt;
&amp;gt;    NODENAME: nid00070&lt;br/&gt;
&amp;gt;     RELEASE: 3.12.51-52.39.1_1.0000.9086-cray_ari_s&lt;br/&gt;
&amp;gt;     VERSION: #1 SMP Thu Mar 10 22:13:18 UTC 2016&lt;br/&gt;
&amp;gt;     MACHINE: x86_64  (2600 Mhz)&lt;br/&gt;
&amp;gt;      MEMORY: 32 GB&lt;br/&gt;
&amp;gt;       PANIC: &quot;&quot;&lt;br/&gt;
&amp;gt;crash&amp;gt; bt 8715&lt;br/&gt;
&amp;gt;PID: 8715   TASK: ffff880442bbd180  CPU: 6   COMMAND: &quot;kgnilnd_sd_04&quot;&lt;br/&gt;
&amp;gt; #0 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff8800457734c8&amp;#93;&lt;/span&gt; schedule at ffffffff815a39e5&lt;br/&gt;
&amp;gt; #1 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773548&amp;#93;&lt;/span&gt; schedule_timeout at ffffffff815a2391&lt;br/&gt;
&amp;gt; #2 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff8800457735e0&amp;#93;&lt;/span&gt; __down_common at ffffffff815a50a6&lt;br/&gt;
&amp;gt; #3 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773640&amp;#93;&lt;/span&gt; __down at ffffffff815a5116&lt;br/&gt;
&amp;gt; #4 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773650&amp;#93;&lt;/span&gt; down at ffffffff81088581&lt;br/&gt;
&amp;gt; #5 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773670&amp;#93;&lt;/span&gt; dvsipc_send_ipc_request_common at ffffffffa053e5e5 &amp;gt;&lt;span class=&quot;error&quot;&gt;&amp;#91;dvsipc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #6 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff8800457736d0&amp;#93;&lt;/span&gt; dvsipc_send_ipc_request at ffffffffa053edd4 &lt;span class=&quot;error&quot;&gt;&amp;#91;dvsipc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #7 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff8800457736e0&amp;#93;&lt;/span&gt; send_ipc_request at ffffffffa053767d &lt;span class=&quot;error&quot;&gt;&amp;#91;dvsipc&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #8 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773708&amp;#93;&lt;/span&gt; dvsnet_send_ipc_request at ffffffffa0c3c2ba &lt;span class=&quot;error&quot;&gt;&amp;#91;dvsnet_if&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt; #9 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773720&amp;#93;&lt;/span&gt; kdwfs_send_transaction at ffffffffa03ad268 &lt;span class=&quot;error&quot;&gt;&amp;#91;kdwfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;#10 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773758&amp;#93;&lt;/span&gt; kdwfs_send_transaction_retry at ffffffffa03ade23 &lt;span class=&quot;error&quot;&gt;&amp;#91;kdwfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;#11 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773780&amp;#93;&lt;/span&gt; kdwfs_send_transaction_namespace_retry at ffffffffa03af54b &lt;span class=&quot;error&quot;&gt;&amp;#91;kdwfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;#12 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff8800457737a0&amp;#93;&lt;/span&gt; kdwfs_send_unlink at ffffffffa03afddd &lt;span class=&quot;error&quot;&gt;&amp;#91;kdwfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;#13 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff8800457737d8&amp;#93;&lt;/span&gt; kdwfs_evict_inode at ffffffffa03a78df &lt;span class=&quot;error&quot;&gt;&amp;#91;kdwfs&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;#14 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff8800457737f8&amp;#93;&lt;/span&gt; evict at ffffffff81194e3c&lt;br/&gt;
&amp;gt;#15 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773820&amp;#93;&lt;/span&gt; iput at ffffffff81195685&lt;br/&gt;
&amp;gt;#16 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773850&amp;#93;&lt;/span&gt; __dentry_kill at ffffffff81191038&lt;br/&gt;
&amp;gt;#17 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773878&amp;#93;&lt;/span&gt; shrink_dentry_list at ffffffff81191393&lt;br/&gt;
&amp;gt;#18 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff8800457738a8&amp;#93;&lt;/span&gt; prune_dcache_sb at ffffffff81192827&lt;br/&gt;
&amp;gt;#19 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff8800457738e0&amp;#93;&lt;/span&gt; super_cache_scan at ffffffff8117e9a6&lt;br/&gt;
&amp;gt;#20 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773928&amp;#93;&lt;/span&gt; shrink_slab_node at ffffffff8112c6ac&lt;br/&gt;
&amp;gt;#21 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff8800457739b8&amp;#93;&lt;/span&gt; shrink_slab at ffffffff8112d355&lt;br/&gt;
&amp;gt;#22 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773a00&amp;#93;&lt;/span&gt; do_try_to_free_pages at ffffffff8113071f&lt;br/&gt;
&amp;gt;#23 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773aa0&amp;#93;&lt;/span&gt; try_to_free_pages at ffffffff8113093f&lt;br/&gt;
&amp;gt;#24 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773b10&amp;#93;&lt;/span&gt; __alloc_pages_nodemask at ffffffff81124ba5&lt;br/&gt;
&amp;gt;#25 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773c38&amp;#93;&lt;/span&gt; alloc_pages_current at ffffffff8116227a&lt;br/&gt;
&amp;gt;#26 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773c80&amp;#93;&lt;/span&gt; __vmalloc_node_range at ffffffff8115430a&lt;br/&gt;
&amp;gt;#27 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773cf0&amp;#93;&lt;/span&gt; vmalloc at ffffffff8115463b&lt;br/&gt;
&amp;gt;#28 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773d10&amp;#93;&lt;/span&gt; kgnilnd_rdma at ffffffffa05d6bed &lt;span class=&quot;error&quot;&gt;&amp;#91;kgnilnd&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;#29 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773d98&amp;#93;&lt;/span&gt; kgnilnd_send_mapped_tx at ffffffffa05dd64e &lt;span class=&quot;error&quot;&gt;&amp;#91;kgnilnd&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;#30 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773dd0&amp;#93;&lt;/span&gt; kgnilnd_process_mapped_tx at ffffffffa05e17d6 &lt;span class=&quot;error&quot;&gt;&amp;#91;kgnilnd&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;#31 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773e60&amp;#93;&lt;/span&gt; kgnilnd_scheduler at ffffffffa05e383a &lt;span class=&quot;error&quot;&gt;&amp;#91;kgnilnd&amp;#93;&lt;/span&gt;&lt;br/&gt;
&amp;gt;#32 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773ed0&amp;#93;&lt;/span&gt; kthread at ffffffff81069ca0&lt;br/&gt;
&amp;gt;#33 &lt;span class=&quot;error&quot;&gt;&amp;#91;ffff880045773f50&amp;#93;&lt;/span&gt; ret_from_fork at ffffffff815ae788&lt;/p&gt;


&lt;p&gt;This stack trace shows that kgnilnd is trying to send... but its using a vmalloc in the send path. Probably due to the buffer to be sent being unaligned in some manner. So the vmalloc needs to fail. but we arent using the __vmalloc that doesn&apos;t not try to do IO so we get stuck....&lt;/p&gt;&lt;/blockquote&gt;</description>
                <environment></environment>
        <key id="37981">LU-8368</key>
            <summary>Use kgnilnd_vzalloc() for copy buffer allocation</summary>
                <type id="1" iconUrl="https://jira.whamcloud.com/secure/viewavatar?size=xsmall&amp;avatarId=11303&amp;avatarType=issuetype">Bug</type>
                                            <priority id="4" iconUrl="https://jira.whamcloud.com/images/icons/priorities/minor.svg">Minor</priority>
                        <status id="5" iconUrl="https://jira.whamcloud.com/images/icons/statuses/resolved.png" description="A resolution has been taken, and it is awaiting verification by reporter. From here issues are either reopened, or are closed.">Resolved</status>
                    <statusCategory id="3" key="done" colorName="success"/>
                                    <resolution id="1">Fixed</resolution>
                                        <assignee username="wc-triage">WC Triage</assignee>
                                    <reporter username="hornc">Chris Horn</reporter>
                        <labels>
                            <label>patch</label>
                    </labels>
                <created>Tue, 5 Jul 2016 15:53:14 +0000</created>
                <updated>Wed, 13 Jul 2016 17:56:09 +0000</updated>
                            <resolved>Wed, 13 Jul 2016 17:56:09 +0000</resolved>
                                                    <fixVersion>Lustre 2.9.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                            <comments>
                            <comment id="157669" author="gerrit" created="Tue, 5 Jul 2016 16:01:02 +0000"  >&lt;p&gt;Chris Horn (hornc@cray.com) uploaded a new patch: &lt;a href=&quot;http://review.whamcloud.com/21154&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/21154&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8368&quot; title=&quot;Use kgnilnd_vzalloc() for copy buffer allocation&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8368&quot;&gt;&lt;del&gt;LU-8368&lt;/del&gt;&lt;/a&gt; gnilnd: Use kgnilnd_vzalloc() to avoid stalls&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: 1&lt;br/&gt;
Commit: b8eee782b9111b1a54ede2cd4c35886ba4f37273&lt;/p&gt;</comment>
                            <comment id="158441" author="gerrit" created="Mon, 11 Jul 2016 23:59:40 +0000"  >&lt;p&gt;Oleg Drokin (oleg.drokin@intel.com) merged in patch &lt;a href=&quot;http://review.whamcloud.com/21154/&quot; class=&quot;external-link&quot; target=&quot;_blank&quot; rel=&quot;nofollow noopener&quot;&gt;http://review.whamcloud.com/21154/&lt;/a&gt;&lt;br/&gt;
Subject: &lt;a href=&quot;https://jira.whamcloud.com/browse/LU-8368&quot; title=&quot;Use kgnilnd_vzalloc() for copy buffer allocation&quot; class=&quot;issue-link&quot; data-issue-key=&quot;LU-8368&quot;&gt;&lt;del&gt;LU-8368&lt;/del&gt;&lt;/a&gt; gnilnd: Use kgnilnd_vzalloc() to avoid stalls&lt;br/&gt;
Project: fs/lustre-release&lt;br/&gt;
Branch: master&lt;br/&gt;
Current Patch Set: &lt;br/&gt;
Commit: 6490222e0252dde0220eaa5337a25c66aa49fc88&lt;/p&gt;</comment>
                            <comment id="158651" author="jgmitter" created="Wed, 13 Jul 2016 17:56:09 +0000"  >&lt;p&gt;Landed to master for 2.9.0&lt;/p&gt;</comment>
                    </comments>
                    <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                            <customfield id="customfield_10890" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummary">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10390" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>1|hzygmv:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10090" key="com.pyxis.greenhopper.jira:gh-global-rank">
                        <customfieldname>Rank (Obsolete)</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>9223372036854775807</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10060" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Severity</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10022"><![CDATA[3]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                        </customfields>
    </item>
</channel>
</rss>